From 3c5ed996ac94a15bc2929155f2c69cc85eef89f7 Mon Sep 17 00:00:00 2001 From: "James K. Lowden" Date: Thu, 6 Mar 2025 16:25:09 -0500 Subject: [PATCH] COBOL: Frontend gcc/cobol/ * LICENSE: New file. * Make-lang.in: New file. * config-lang.in: New file. * lang.opt: New file. * lang.opt.urls: New file. * cbldiag.h: New file. * cdfval.h: New file. * cobol-system.h: New file. * copybook.h: New file. * dts.h: New file. * exceptg.h: New file. * gengen.h: New file. * genmath.h: New file. * genutil.h: New file. * inspect.h: New file. * lang-specs.h: New file. * lexio.h: New file. * parse_ante.h: New file. * parse_util.h: New file. * scan_ante.h: New file. * scan_post.h: New file. * show_parse.h: New file. * structs.h: New file. * symbols.h: New file. * token_names.h: New file. * util.h: New file. * cdf-copy.cc: New file. * lexio.cc: New file. * scan.l: New file. * parse.y: New file. * genapi.cc: New file. * genapi.h: New file. * gengen.cc: New file. * genmath.cc: New file. * genutil.cc: New file. * cdf.y: New file. * cobol1.cc: New file. * convert.cc: New file. * except.cc: New file. * gcobolspec.cc: New file. * structs.cc: New file. * symbols.cc: New file. * symfind.cc: New file. * util.cc: New file. * gcobc: New file. * gcobol.1: New file. * gcobol.3: New file. * help.gen: New file. * udf/stored-char-length.cbl: New file. --- gcc/cobol/LICENSE | 29 + gcc/cobol/Make-lang.in | 366 + gcc/cobol/cbldiag.h | 111 + gcc/cobol/cdf-copy.cc | 356 + gcc/cobol/cdf.y | 956 ++ gcc/cobol/cdfval.h | 113 + gcc/cobol/cobol-system.h | 64 + gcc/cobol/cobol1.cc | 692 + gcc/cobol/config-lang.in | 38 + gcc/cobol/convert.cc | 78 + gcc/cobol/copybook.h | 205 + gcc/cobol/dts.h | 109 + gcc/cobol/except.cc | 370 + gcc/cobol/exceptg.h | 61 + gcc/cobol/gcobc | 465 + gcc/cobol/gcobol.1 | 1628 +++ gcc/cobol/gcobol.3 | 328 + gcc/cobol/gcobolspec.cc | 694 + gcc/cobol/genapi.cc | 16926 +++++++++++++++++++++++++ gcc/cobol/genapi.h | 587 + gcc/cobol/gengen.cc | 3462 +++++ gcc/cobol/gengen.h | 544 + gcc/cobol/genmath.cc | 1730 +++ gcc/cobol/genmath.h | 36 + gcc/cobol/genutil.cc | 2642 ++++ gcc/cobol/genutil.h | 168 + gcc/cobol/help.gen | 15 + gcc/cobol/inspect.h | 237 + gcc/cobol/lang-specs.h | 47 + gcc/cobol/lang.opt | 144 + gcc/cobol/lang.opt.urls | 29 + gcc/cobol/lexio.cc | 1878 +++ gcc/cobol/lexio.h | 294 + gcc/cobol/parse.y | 13107 +++++++++++++++++++ gcc/cobol/parse_ante.h | 3552 ++++++ gcc/cobol/parse_util.h | 478 + gcc/cobol/scan.l | 2487 ++++ gcc/cobol/scan_ante.h | 745 ++ gcc/cobol/scan_post.h | 401 + gcc/cobol/show_parse.h | 523 + gcc/cobol/structs.cc | 333 + gcc/cobol/structs.h | 62 + gcc/cobol/symbols.cc | 4881 +++++++ gcc/cobol/symbols.h | 2210 ++++ gcc/cobol/symfind.cc | 611 + gcc/cobol/token_names.h | 1373 ++ gcc/cobol/udf/stored-char-length.cbl | 15 + gcc/cobol/util.cc | 2310 ++++ gcc/cobol/util.h | 49 + 49 files changed, 68539 insertions(+) create mode 100644 gcc/cobol/LICENSE create mode 100644 gcc/cobol/Make-lang.in create mode 100644 gcc/cobol/cbldiag.h create mode 100644 gcc/cobol/cdf-copy.cc create mode 100644 gcc/cobol/cdf.y create mode 100644 gcc/cobol/cdfval.h create mode 100644 gcc/cobol/cobol-system.h create mode 100644 gcc/cobol/cobol1.cc create mode 100644 gcc/cobol/config-lang.in create mode 100644 gcc/cobol/convert.cc create mode 100644 gcc/cobol/copybook.h create mode 100644 gcc/cobol/dts.h create mode 100644 gcc/cobol/except.cc create mode 100644 gcc/cobol/exceptg.h create mode 100755 gcc/cobol/gcobc create mode 100644 gcc/cobol/gcobol.1 create mode 100644 gcc/cobol/gcobol.3 create mode 100644 gcc/cobol/gcobolspec.cc create mode 100644 gcc/cobol/genapi.cc create mode 100644 gcc/cobol/genapi.h create mode 100644 gcc/cobol/gengen.cc create mode 100644 gcc/cobol/gengen.h create mode 100644 gcc/cobol/genmath.cc create mode 100644 gcc/cobol/genmath.h create mode 100644 gcc/cobol/genutil.cc create mode 100644 gcc/cobol/genutil.h create mode 100755 gcc/cobol/help.gen create mode 100644 gcc/cobol/inspect.h create mode 100644 gcc/cobol/lang-specs.h create mode 100644 gcc/cobol/lang.opt create mode 100644 gcc/cobol/lang.opt.urls create mode 100644 gcc/cobol/lexio.cc create mode 100644 gcc/cobol/lexio.h create mode 100644 gcc/cobol/parse.y create mode 100644 gcc/cobol/parse_ante.h create mode 100644 gcc/cobol/parse_util.h create mode 100644 gcc/cobol/scan.l create mode 100644 gcc/cobol/scan_ante.h create mode 100644 gcc/cobol/scan_post.h create mode 100644 gcc/cobol/show_parse.h create mode 100644 gcc/cobol/structs.cc create mode 100644 gcc/cobol/structs.h create mode 100644 gcc/cobol/symbols.cc create mode 100644 gcc/cobol/symbols.h create mode 100644 gcc/cobol/symfind.cc create mode 100644 gcc/cobol/token_names.h create mode 100644 gcc/cobol/udf/stored-char-length.cbl create mode 100644 gcc/cobol/util.cc create mode 100644 gcc/cobol/util.h diff --git a/gcc/cobol/LICENSE b/gcc/cobol/LICENSE new file mode 100644 index 00000000000..aa5ba6024cc --- /dev/null +++ b/gcc/cobol/LICENSE @@ -0,0 +1,29 @@ +######################################################################### +# +# Copyright (c) 2021-2025 Symas Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of the Symas Corporation nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/gcc/cobol/Make-lang.in b/gcc/cobol/Make-lang.in new file mode 100644 index 00000000000..8cc837eabf3 --- /dev/null +++ b/gcc/cobol/Make-lang.in @@ -0,0 +1,366 @@ +# Top level -*- makefile -*- fragment for Cobol +# Copyright (C) 2021-2025 Free Software Foundation, Inc. + +# This file is part of GCC. + +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. + +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# This file provides the language dependent support in the main Makefile. +# Each language makefile fragment must provide the following targets: +# +# foo.all.cross, foo.start.encap, foo.rest.encap, +# foo.install-common, foo.install-man, foo.install-info, foo.install-pdf, +# foo.install-html, foo.info, foo.dvi, foo.pdf, foo.html, foo.uninstall, +# foo.mostlyclean, foo.clean, foo.distclean, +# foo.maintainer-clean, foo.stage1, foo.stage2, foo.stage3, foo.stage4 +# +# where `foo' is the name of the language. +# +# It should also provide rules for: +# +# - making any compiler driver (eg: g++) +# - the compiler proper (eg: cc1plus) +# - define the names for selecting the language in LANGUAGES. + +gcobol_INSTALL_NAME := $(shell echo gcobol|sed '$(program_transform_name)') +gcobol_TARGET_INSTALL_NAME := $(target_noncanonical)-$(shell echo gcobol|sed '$(program_transform_name)') + +cobol: cobol1$(exeext) +.PHONY: cobol + +BINCLUDE ?= ./gcc +LIB_INCLUDE ?= $(srcdir)/../libgcobol +LIB_SOURCE ?= $(srcdir)/../libgcobol + +# +# At this point, as of 2022-10-21, CPPFLAGS is an empty string and can be +# altered. CFLAGS and CXXFLAGS are being established upstream, and thus +# cannot, at this point, be changed. +# +# Note further that we are producing only a 64-bit version of libgcobol.so, so +# it is safe to hard-code the lib64 location. This obviously has to match the +# installation code in libgcobol/Makefile.in +# +CPPFLAGS = \ + -std=c++14 \ + -Iinclude \ + -I$(BINCLUDE) \ + -I$(LIB_INCLUDE) \ + -DEXEC_LIB=\"$(prefix)/lib64\" \ + $(END) + +YFLAGS = -Werror -Wmidrule-values -Wno-yacc \ + --debug --verbose + +LFLAGS = -d -Ca + +# +# These are the object files for creating the cobol1.exe compiler: +# +cobol1_OBJS = \ + cobol/cdf.o \ + cobol/cdf-copy.o \ + cobol/cobol1.o \ + cobol/convert.o \ + cobol/except.o \ + cobol/genutil.o \ + cobol/genapi.o \ + cobol/genmath.o \ + cobol/gengen.o \ + cobol/lexio.o \ + cobol/parse.o \ + cobol/scan.o \ + cobol/structs.o \ + cobol/symbols.o \ + cobol/symfind.o \ + cobol/util.o \ + cobol/charmaps.o \ + cobol/valconv.o \ + $(END) + +# +# There is source code in libgcobol/charmaps.cc and +# libgcobol/valconv.cc that needs to be compiled into both libgcobol +# and cobol1. We copy those two source code files from libgcobol to +# here to avoid the nightmare of one file appearing in more than one +# place. For simplicity, we make those compilations dependent on all +# of the libgcobol/*.h files, which might lead to the occasional +# unnecessary compilation. The impact of that is negligible. +# +cobol/charmaps.cc: $(LIB_SOURCE)/charmaps.cc + cp $^ $@ + +cobol/valconv.cc: $(LIB_SOURCE)/valconv.cc + cp $^ $@ + +LIB_SOURCE_H=$(wildcard $(LIB_SOURCE)/*.h) + +cobol/charmaps.o: cobol/charmaps.cc $(LIB_SOURCE_H) + +cobol/valconv.o: cobol/valconv.cc $(LIB_SOURCE_H) + +# +# These are the object files for creating the gcobol.exe "driver" +# +GCOBOL_D_OBJS = $(GCC_OBJS) cobol/gcobolspec.o + +# +# These get combined to provide a dependency relationship that ensures all +# of the "generated-files" are generated before we need them. See the root +# Makefile.in code that looks like this: +# ALL_HOST_FRONTEND_OBJS = $(foreach v,$(CONFIG_LANGUAGES),$($(v)_OBJS)) +# +cobol_OBJS = \ + $(cobol1_OBJS) \ + cobol/gcobolspec.o \ + $(END) + +# +# Frankly, I can't figure out what this does: +# +CFLAGS-cobol/gcobolspec.o += $(DRIVER_DEFINES) + +# +# This controls the build of the gcobol.exe "driver" +# +gcobol$(exeext): \ + $(GCOBOL_D_OBJS) \ + $(EXTRA_GCC_OBJS) \ + libcommon-target.a \ + $(LIBDEPS) + +$(LINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ \ + $(GCOBOL_D_OBJS) $(EXTRA_GCC_OBJS) libcommon-target.a \ + $(EXTRA_GCC_LIBS) $(LIBS) + +# +# These control the build of the cobol1.exe source-to-GENERIC converter +# + +# First, files needed for parsing: + +cobol/parse.c: cobol/parse.y + $(BISON) -o $@ $(YFLAGS) \ + --defines=cobol/parse.h \ + --report-file=cobol/parser.out $< + +cobol/cdf.c: cobol/cdf.y + $(BISON) -o $@ $(YFLAGS) \ + --defines=cobol/cdf.h --report-file=cobol/cdf.out $< + +# See "Trailing context is getting confused with trailing optional patterns" +# in Flex manual. We suppress those messages, as a convenience. +FLEX_WARNING = warning, dangerous trailing context + +cobol/scan.c: cobol/scan.l + $(FLEX) -o$@ $(LFLAGS) $< >$@~ 2>&1 + awk '! /$(FLEX_WARNING)/ {print > "/dev/stderr"; nerr++} \ + END {print "$(FLEX):", NR, "messages" > "/dev/stderr"; \ + exit nerr}' $@~ + @rm $@~ + + +# To establish prerequisites for parse.o, cdf.o, and scan.o, +# 1. capture the "make -n" output +# 2. eliminate compiler options, leaving only preprocessor options (-D and -I) +# 3. add -E -MM +# +# The below lists of include files for the the generated files is +# postprocessed: the files are one per line, used "realpath +# --relative-to=$PWD" to rationalize them, and sorted. We include +# parse.c in the list for scan.o because that's the one make(1) knows about. + +cobol/cdf.o: cobol/cdf.c \ + $(srcdir)/cobol/cbldiag.h \ + $(srcdir)/cobol/cdfval.h \ + $(srcdir)/cobol/copybook.h \ + $(srcdir)/cobol/exceptg.h \ + $(srcdir)/cobol/symbols.h \ + $(srcdir)/cobol/util.h \ + $(srcdir)/../libgcobol/common-defs.h \ + $(srcdir)/../libgcobol/ec.h \ + $(srcdir)/../libgcobol/exceptl.h + +cobol/parse.o: cobol/parse.c \ + $(srcdir)/cobol/cbldiag.h \ + $(srcdir)/cobol/cdfval.h \ + $(srcdir)/cobol/cobol-system.h \ + $(srcdir)/cobol/exceptg.h \ + $(srcdir)/cobol/genapi.h \ + $(srcdir)/cobol/inspect.h \ + $(srcdir)/cobol/parse_ante.h \ + $(srcdir)/cobol/parse_util.h \ + $(srcdir)/cobol/symbols.h \ + $(srcdir)/cobol/util.h \ + $(srcdir)/hwint.h \ + $(srcdir)/system.h \ + $(srcdir)/../include/ansidecl.h \ + $(srcdir)/../include/filenames.h \ + $(srcdir)/../include/hashtab.h \ + $(srcdir)/../include/libiberty.h \ + $(srcdir)/../include/safe-ctype.h \ + $(srcdir)/../libgcobol/common-defs.h \ + $(srcdir)/../libgcobol/ec.h \ + $(srcdir)/../libgcobol/exceptl.h \ + $(srcdir)/../libgcobol/io.h \ + auto-host.h \ + config.h + +cobol/scan.o: cobol/scan.c \ + $(srcdir)/cobol/cbldiag.h \ + $(srcdir)/cobol/cdfval.h \ + $(srcdir)/cobol/cobol-system.h \ + $(srcdir)/cobol/copybook.h \ + $(srcdir)/cobol/dts.h \ + $(srcdir)/cobol/exceptg.h \ + $(srcdir)/cobol/inspect.h \ + $(srcdir)/cobol/lexio.h \ + $(srcdir)/cobol/scan_ante.h \ + $(srcdir)/cobol/scan_post.h \ + $(srcdir)/cobol/symbols.h \ + $(srcdir)/cobol/util.h \ + $(srcdir)/hwint.h \ + $(srcdir)/system.h \ + $(srcdir)/../include/ansidecl.h \ + $(srcdir)/../include/filenames.h \ + $(srcdir)/../include/hashtab.h \ + $(srcdir)/../include/libiberty.h \ + $(srcdir)/../include/safe-ctype.h \ + $(srcdir)/../libgcobol/common-defs.h \ + $(srcdir)/../libgcobol/ec.h \ + $(srcdir)/../libgcobol/exceptl.h \ + $(srcdir)/../libgcobol/io.h \ + auto-host.h \ + config.h \ + cobol/cdf.c \ + cobol/parse.c + +# +# The src targets are executed if +# ‘--enable-generated-files-in-srcdir’ was specified as a configure +# option. +# +# srcextra copies generated dependencies into the source +# directory. This is used for files such as Flex/Bison output: files +# that are not version-controlled but should be included in any +# release tarballs. +# +# Although versioned snapshots require Flex to be installed, they do +# not require Bison. Release tarballs always include Flex/Bison +# output, and do not require those tools to be installed. +# +cobol.srcextra: cobol/parse.c cobol/cdf.c cobol/scan.c + ln -f $^ cobol/parse.h cobol/cdf.h $(srcdir)/cobol/ + + +# And the cobol1.exe front end + +cobol1$(exeext): $(cobol1_OBJS) $(BACKEND) $(LIBDEPS) attribs.o + +$(LLINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) attribs.o -o $@ \ + $(cobol1_OBJS) $(BACKEND) $(LIBS) $(BACKENDLIBS) + +# FIXME +cobol.all.cross: + +cobol.start.encap: gcobol$(exeext) + +cobol.rest.encap: + +cobol.install-common: installdirs + $(INSTALL_PROGRAM) gcobol$(exeext) $(DESTDIR)$(bindir)/ + $(INSTALL_PROGRAM) cobol1$(exeext) $(DESTDIR)$(libexecsubdir)/ + $(INSTALL) -m 755 $(srcdir)/cobol/gcobc $(DESTDIR)$(bindir)/ + mkdir -p $(DESTDIR)$(datadir)/gcobol/udf + $(INSTALL_DATA) $(srcdir)/cobol/udf/* $(DESTDIR)$(datadir)/gcobol/udf/ + +cobol.install-man: installdirs + $(INSTALL_DATA) $(srcdir)/cobol/gcobol.1 $(DESTDIR)$(man1dir)/ + $(INSTALL_DATA) $(srcdir)/cobol/gcobol.3 $(DESTDIR)$(man3dir)/ + +cobol.install-info: + +cobol.install-pdf: installdirs gcobol.pdf gcobol-io.pdf + mkdir -p $(DESTDIR)$(datadir)/gcobol/pdf + $(INSTALL_DATA) gcobol.pdf gcobol-io.pdf $(DESTDIR)$(pdfdir)/ + +cobol.install-plugin: + +cobol.install-html: installdirs gcobol.html gcobol-io.html + $(INSTALL_DATA) gcobol.html gcobol-io.html $(DESTDIR)$(htmldir)/ + +cobol.info: +cobol.srcinfo: + +cobol.dvi: +cobol.srcdvi: + +cobol.pdf: gcobol.pdf gcobol-io.pdf +cobol.srcpdf: gcobol.pdf gcobol-io.pdf + ln $^ $(srcdir)/cobol/ + +gcobol.pdf: $(srcdir)/cobol/gcobol.1 + groff -mdoc -T pdf $^ > $@~ + @mv $@~ $@ +gcobol-io.pdf: $(srcdir)/cobol/gcobol.3 + groff -mdoc -T pdf $^ > $@~ + @mv $@~ $@ + +cobol.html: gcobol.html gcobol-io.html +cobol.srchtml: gcobol.html gcobol-io.html + ln $^ $(srcdir)/cobol/ + +gcobol.html: $(srcdir)/cobol/gcobol.1 + mandoc -T html $^ > $@~ + @mv $@~ $@ +gcobol-io.html: $(srcdir)/cobol/gcobol.3 + mandoc -T html $^ > $@~ + @mv $@~ $@ + +# "make uninstall" is not expected to work. It's not clear how to name +# the installed location of the cobol1 compiler. +cobol.uninstall: + rm -rf $(DESTDIR)$(bindir)/$(gcobol_INSTALL_NAME)$(exeext) \ + $(DESTDIR)$(bindir)/gcobc \ + $(DESTDIR)$(datadir)/gcobol/ \ + $(DESTDIR)$(man1dir)/gcobol.1 \ + $(DESTDIR)$(man3dir)/gcobol.3 + +cobol.man: +cobol.srcman: + +cobol.mostlyclean: + +cobol.clean: + rm -fr gcobol cobol1 cobol/* \ + ../*/libgcobol/* + +cobol.distclean: + +cobol.maintainer-clean: + +# The main makefile has already created stage?/cobol. +cobol.stage1: stage1-start + -mv cobol/*$(objext) stage1/cobol +cobol.stage2: stage2-start + -mv cobol/*$(objext) stage2/cobol +cobol.stage3: stage3-start + -mv cobol/*$(objext) stage3/cobol +cobol.stage4: stage4-start + -mv cobol/*$(objext) stage4/cobol +cobol.stageprofile: stageprofile-start + -mv cobol/*$(objext) stageprofile/cobol +cobol.stagefeedback: stagefeedback-start + -mv cobol/*$(objext) stagefeedback/cobol + +selftest-cobol: diff --git a/gcc/cobol/cbldiag.h b/gcc/cobol/cbldiag.h new file mode 100644 index 00000000000..ed754f1203e --- /dev/null +++ b/gcc/cobol/cbldiag.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef _CBLDIAG_H +#pragma message __FILE__ " included twice" +#else +#define _CBLDIAG_H + +const char * cobol_filename(); + +/* + * These are user-facing messages. They go through the gcc + * diagnostic framework and use text that can be localized. + */ +void yyerror( const char fmt[], ... ); +bool yywarn( const char fmt[], ... ); + +/* Location type. Borrowed from parse.h as generated by Bison. */ +#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED +typedef struct YYLTYPE YYLTYPE; +struct YYLTYPE +{ + int first_line; + int first_column; + int last_line; + int last_column; +}; +# define YYLTYPE_IS_DECLARED 1 +# define YYLTYPE_IS_TRIVIAL 1 + +const YYLTYPE& cobol_location(); +#endif + +#if ! defined YDFLTYPE && ! defined YDFLTYPE_IS_DECLARED +typedef struct YDFLTYPE YDFLTYPE; +struct YDFLTYPE +{ + int first_line; + int first_column; + int last_line; + int last_column; +}; +# define YDFLTYPE_IS_DECLARED 1 +# define YDFLTYPE_IS_TRIVIAL 1 + +#endif + +// an error at a location, called from the parser for semantic errors +void error_msg( const YYLTYPE& loc, const char gmsgid[], ... ); + +void dialect_error( const YYLTYPE& loc, const char term[], const char dialect[] ); + + +// for CDF and other warnings that refer back to an earlier line +// (not in diagnostic framework yet) +void yyerrorvl( int line, const char *filename, const char fmt[], ... ); + +void cbl_unimplementedw(const char *gmsgid, ...); // warning +void cbl_unimplemented(const char *gmsgid, ...); // error +void cbl_unimplemented_at( const YYLTYPE& loc, const char *gmsgid, ... ); + +/* + * dbgmsg produce messages not intended for the user. They cannot + * be localized and fwrite directly to standard out. dbgmsg is activated by + * -fflex-debug or -fyacc-debug. + */ +void dbgmsg( const char fmt[], ... ); + +void gcc_location_set( const YYLTYPE& loc ); + +// tree.h defines yy_flex_debug as a macro because options.h +#if ! defined(yy_flex_debug) +template +static void +location_dump( const char func[], int line, const char tag[], const LOC& loc) { + extern int yy_flex_debug; + if( yy_flex_debug && getenv("update_location") ) + fprintf(stderr, "%s:%d: %s location (%d,%d) to (%d,%d)\n", + func, line, tag, + loc.first_line, loc.first_column, loc.last_line, loc.last_column); +} +#endif // defined(yy_flex_debug) + +#endif diff --git a/gcc/cobol/cdf-copy.cc b/gcc/cobol/cdf-copy.cc new file mode 100644 index 00000000000..dfa3f57315d --- /dev/null +++ b/gcc/cobol/cdf-copy.cc @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// NOTE: Unlike charmaps-copy.cc and valprint-copy.cc, this file implements +// the Compiler Directives Facility for the COBOL "COPY" statement. So, this +// file is the actual source code, and not a copy of something in libgcobol +// +// We regret any confusion engendered. + +#include "cobol-system.h" +#include "cbldiag.h" +#include "util.h" +#include "copybook.h" + +#include +#include + +#define COUNT_OF(X) (sizeof(X) / sizeof(X[0])) + +/* + * There are 3 kinds of replacement types: + * 1. keywords, identifiers, figurative constants, and function names + * 2. string literals + * 3. pseudo-text + * + * Types #1 and #3 are delimited by separators: + * [[:space:],.;()]. String literals begin and end with ["] or ['] + * (matched). + * + * Space in pseudo-text is "elastic"; one or more in the matching + * argument matches one or more in the input. Exception: when the + * argument is only a comma or semicolon, it matches exactly. + * + * The matching algorithm operates on the source file word by word. + * Comments are copied literally, as are any CDF statements. + * + * The candidate word is used as the beginning of all possible + * matches, in the order they appear in the COPY statement. If none + * match, the word is copied to the output and the next word is + * tried. + * + * On a match, the replacement is applied, the result copied to the + * output, and the next word is tried, starting again from the first + * match candidate. + * + * The parser composes the regular expressions. It "literalizes" + * any regex metacharacters that may appear in the COPY text and + * constructs the correct matching expression for "stretchable" + * space. This function only applies them. + */ + +extern int yydebug; +const char * cobol_filename(); +bool is_fixed_format(); +bool is_reference_format(); + +struct line_t { + char *p, *pend; + line_t( size_t len, char *data ) : p(data), pend(data + len) { + gcc_assert(p && p <= pend); + } + line_t( char *data, char *eodata ) : p(data), pend(eodata) { + gcc_assert(p && p <= pend); + } + ssize_t size() const { return pend - p; } +}; + +static bool +is_separator_space( const char *p) { + switch( *p ) { + case ',': + case ';': + if( p[1] == 0x20 ) return true; + break; + } + return ISSPACE(*p); +} + +static void +verify_bounds( size_t pos, size_t size, const char input[] ) { + gcc_assert(pos < size ); + if( !( pos < size) ) { + cbl_internal_error( "REPLACING %zu characters exceeds system capacity" + "'%s'", pos, input); + } +} + +/* + * Replace any separators in the copybook's REPLACING candidate with + * "stretchable" space. Escape any regex metacharacters in candidate. + * + * "For matching purposes, each occurrence of a separator comma, a + * separator semicolon, or a sequence of one or more separator spaces + * is considered to be a single space." + * + * If the indicator column is column 7 and is a 'D', we treat that as + * a SPACE for the purposes of matching a COPY REPLACING or REPLACE + * directive. + */ +const char * +esc( size_t len, const char input[] ) { + static char spaces[] = "([,;]?[[:space:]])+"; + static char spaceD[] = "(\n {6}D" "|" "[,;]?[[:space:]])+"; + static char buffer[64 * 1024]; + char *p = buffer; + const char *eoinput = input + len; + + const char *spacex = is_reference_format()? spaceD : spaces; + + for( const char *s=input; *s && s < eoinput; s++ ) { + *p = '\0'; + verify_bounds( 4 + size_t(p - buffer), sizeof(buffer), buffer ); + switch(*s) { + case '^': case '$': + case '(': case ')': + case '*': case '+': case '?': + case '[': case ']': + case '{': case '}': + case '|': + case '.': + *p++ = '\\'; + *p++ = *s; + break; + case '\\': + *p++ = '['; + *p++ = *s; + *p++ = ']'; + break; + + case ';': case ',': + if( ! (s+1 < eoinput && s[1] == 0x20) ) { + *p++ = *s; + break; + } + __attribute__((fallthrough)); + case 0x20: case '\n': + verify_bounds( (p + sizeof(spacex)) - buffer, sizeof(buffer), buffer ); + p = stpcpy( p, spacex ); + while( s+1 < eoinput && is_separator_space(s+1) ) { + s++; + } + break; + default: + *p++ = *s; + break; + } + } + *p = '\0'; + +#if 0 + dbgmsg("%s:%d: regex '%s'", __func__, __LINE__, buffer); +#endif + return buffer; // caller must strdup static buffer +} + +static int +glob_error(const char *epath, int eerrno) { + dbgmsg("%s: COPY file search: '%s': %s", __func__, epath, xstrerror(eerrno)); + return 0; +} + +void +copybook_directory_add( const char gcob_copybook[] ) { + if( !gcob_copybook ) return; + char *directories = xstrdup(gcob_copybook), *p = directories; + char *eodirs = strchr(directories, '\0'); + gcc_assert(eodirs); + + do { + char *pend = std::find(p, eodirs, ':'); + if( pend != eodirs ) { + *pend = '\0'; + } + copybook.directory_add(p); + p = pend; + } while( ++p < eodirs ); + +} + +class case_consistent { + int lower_upper; // -1 lower, 1 upper +public: + case_consistent() : lower_upper(0) {} + bool operator()( char ch ) { + if( !ISALPHA(ch) ) return true; + int lu = ISLOWER(ch)? -1 : 1; + if( !lower_upper ) { + lower_upper = lu; + return true; + } + return lu == lower_upper; + } +}; + +void +copybook_extension_add( const char ext[] ) { + char *alt = NULL; + bool one_case = std::all_of( ext, ext + strlen(ext), case_consistent() ); + if( one_case ) { + alt = xstrdup(ext); + gcc_assert(alt); + auto convert = ISLOWER(ext[0])? toupper : tolower; + std::transform( alt, alt+strlen(alt), alt, convert ); + } + copybook.extensions_add( ext, alt ); +} + +extern int yydebug; + +const char * copybook_elem_t::extensions; + +void +copybook_t::extensions_add( const char ext[], const char alt[] ) { + char *output; + if( alt ) { + output = xasprintf("%s,%s", ext, alt); + } else { + output = xstrdup(ext); + } + gcc_assert(output); + if( book.extensions ) { + char *s = xasprintf("%s,%s", output, book.extensions); + free(const_cast(book.extensions)); + free(output); + book.extensions = s; + } else { + book.extensions = output; + } +} + +static inline ino_t +inode_of( int fd ) { + struct stat sb; + if( -1 == fstat(fd, &sb) ) { + cbl_err("could not stat fd %d", fd); + } + return sb.st_ino; +} + +int +copybook_elem_t::open_file( const char directory[], bool literally ) { + int erc; + char *pattern, *copier = xstrdup(cobol_filename()); + if( ! directory ) { + directory = dirname(copier); + if( 0 == strcmp(".", directory) ) directory = NULL; + } + + char *path = NULL; + + if( directory || library.name ) { + if( directory && library.name ) { + path = xasprintf( "%s/%s/%s", directory, library.name, source.name ); + } else { + const char *dir = directory? directory : library.name; + path = xasprintf( "%s/%s", dir, source.name ); + } + } else { + path = xasprintf( "%s", source.name ); + } + + gcc_assert(path); + + if( literally ) { + dbgmsg("copybook_elem_t::open_file: trying %s", path); + + if( (this->fd = open(path, O_RDONLY)) == -1 ) { + dbgmsg("could not open %s: %m", path); + return fd; + } + this->source.name = path; + if( ! cobol_filename(this->source.name, inode_of(fd)) ) { + error_msg(source.loc, "recursive copybook: '%s' includes itself", path); + (void)! close(fd); + fd = -1; + } + return fd; + } + gcc_assert( ! literally ); + + if( extensions ) { + pattern = xasprintf("%s{,.cpy,.CPY,.cbl,.CBL,.cob,.COB,%s}", + path, this->extensions); + } else { + pattern = xasprintf("%s{,.cpy,.CPY,.cbl,.CBL,.cob,.COB}", path); + } + + free(copier); + + static int flags = GLOB_MARK | GLOB_BRACE | GLOB_TILDE; + glob_t globber; + + if( (erc = glob(pattern, flags, glob_error, &globber)) != 0 ) { + switch(erc) { + case GLOB_NOSPACE: + yywarn("COPY file search: out of memory"); + break; + case GLOB_ABORTED: + yywarn("COPY file search: read error"); + break; + case GLOB_NOMATCH: + dbgmsg("COPY '%s': no files match %s", this->source.name, pattern); + default: + break; // caller says no file found + } + return -1; + } + + free(pattern); + + for( size_t i=0; i < globber.gl_pathc; i++ ) { + auto filename = globber.gl_pathv[i]; + if( (this->fd = open(filename, O_RDONLY)) != -1 ) { + dbgmsg("found copybook file %s", filename); + this->source.name = xstrdup(filename); + if( ! cobol_filename(this->source.name, inode_of(fd)) ) { + error_msg(source.loc, "recursive copybook: '%s' includes itself", this->source); + (void)! close(fd); + fd = -1; + } + globfree(&globber); + return fd; + } + } + yywarn("could not open copy source for '%s'", source); + + globfree(&globber); + return -1; +} diff --git a/gcc/cobol/cdf.y b/gcc/cobol/cdf.y new file mode 100644 index 00000000000..08b53412b26 --- /dev/null +++ b/gcc/cobol/cdf.y @@ -0,0 +1,956 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +%{ + +#include "cobol-system.h" +#include "ec.h" +#include "common-defs.h" +#include "util.h" +#include "cbldiag.h" +#include "symbols.h" +#include "copybook.h" +#include "exceptl.h" +#include "exceptg.h" + +#define COUNT_OF(X) (sizeof(X) / sizeof(X[0])) + +copybook_t copybook; + +static inline bool +is_word( int c ) { + return c == '_' || ISALNUM(c); +} + +static std::pair +integer_literal( const char input[] ) { + long long v; + int n; + bool fOK = 1 == sscanf(input, "%lld%n", &v, &n) && + n == (int)strlen(input); + return std::make_pair(v, fOK); +} + +/* "The renamed symbols include 'yyparse', 'yylex', 'yyerror', + 'yynerrs', 'yylval', 'yylloc', 'yychar' and 'yydebug'. [...] The + renamed macros include 'YYSTYPE', 'YYLTYPE', and 'YYDEBUG'" */ + +extern int yylineno, yyleng; +extern char *yytext; + +static int ydflex(void); + +#define PROGRAM current_program_index() + +const YYLTYPE& cobol_location(); +static YYLTYPE location_set( const YYLTYPE& loc ); +void input_file_status_notify(); + +#define YYLLOC_DEFAULT(Current, Rhs, N) \ + do { \ + if (N) \ + { \ + (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \ + (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \ + (Current).last_line = YYRHSLOC (Rhs, N).last_line; \ + (Current).last_column = YYRHSLOC (Rhs, N).last_column; \ + location_dump("cdf.c", N, \ + "rhs N ", YYRHSLOC (Rhs, N)); \ + } \ + else \ + { \ + (Current).first_line = \ + (Current).last_line = YYRHSLOC (Rhs, 0).last_line; \ + (Current).first_column = \ + (Current).last_column = YYRHSLOC (Rhs, 0).last_column; \ + } \ + location_dump("cdf.c", __LINE__, "current", (Current)); \ + input_file_status_notify(); \ + gcc_location_set( location_set(Current) ); \ + } while (0) + +%} + +%code requires { + #include "cdfval.h" + + using std::map; + + static map dictionary; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" + static bool + cdfval_add( const char name[], + const cdfval_t& value, bool override = false ) + { + if( scanner_parsing() ) { + if( ! override ) { + if( dictionary.find(name) != dictionary.end() ) return false; + } + dictionary[name] = value; + } + return true; + } + static void + cdfval_off( const char name[] ) { + if( scanner_parsing() ) { + auto p = dictionary.find(name); + if( p == dictionary.end() ) { + dictionary[name] = cdfval_t(); + } + dictionary[name].off = true; + } + } +#pragma GCC diagnostic pop + + bool operator==( const cdfval_base_t& lhs, int rhs ); + bool operator||( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + bool operator&&( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + + cdfval_t operator<( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t operator<=( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t operator==( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t operator!=( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t operator>=( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t operator>( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t operator+( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t operator-( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t operator*( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t operator/( const cdfval_base_t& lhs, const cdfval_base_t& rhs ); + cdfval_t negate( cdfval_base_t lhs ); + +} + +%{ +static char *display_msg; +const char * keyword_str( int token ); + +static class exception_turns_t { + typedef std::list filelist_t; + typedef std::map ec_filemap_t; + ec_filemap_t exceptions; + public: + bool enabled, location; + + exception_turns_t() : enabled(false), location(false) {}; + + const ec_filemap_t& exception_files() const { return exceptions; } + + struct args_t { + size_t nexception; + cbl_exception_files_t *exceptions; + }; + + bool add_exception( ec_type_t type, const filelist_t files = filelist_t() ) { + ec_disposition_t disposition = ec_type_disposition(type); + if( disposition != ec_implemented(disposition) ) { + cbl_unimplementedw("CDF: exception '%s'", ec_type_str(type)); + } + auto elem = exceptions.find(type); + if( elem != exceptions.end() ) return false; // cannot add twice + + exceptions[type] = files; + return true; + } + + args_t args() const { + args_t args; + args.nexception = exceptions.size(); + args.exceptions = NULL; + if( args.nexception ) { + args.exceptions = new cbl_exception_files_t[args.nexception]; + } + std::transform( exceptions.begin(), exceptions.end(), args.exceptions, + []( auto& input ) { + cbl_exception_files_t output; + output.type = input.first; + output.nfile = input.second.size(); + output.files = NULL; + if( output.nfile ) { + output.files = new size_t[output.nfile]; + std::copy(input.second.begin(), + input.second.end(), + output.files ); + } + return output; + } ); + return args; + } + + void clear() { + for( auto& ex : exceptions ) { + ex.second.clear(); + } + exceptions.clear(); + enabled = location = false; + } + +} exception_turns; + + +static bool +apply_cdf_turn( exception_turns_t& turns ) { + for( auto elem : turns.exception_files() ) { + std::set files(elem.second.begin(), elem.second.end()); + enabled_exceptions.turn_on_off(turns.enabled, + turns.location, + elem.first, files); + } + if( getenv("SHOW_PARSE") ) enabled_exceptions.dump(); + return true; +} +%} + +%union { + bool boolean; + int number; + const char *string; + cdf_arg_t cdfarg; + cdfval_base_t cdfval; + cbl_file_t *file; + std::set *files; +} + +%printer { fprintf(yyo, "'%s'", $$ ); } +%printer { fprintf(yyo, "%s '%s'", + keyword_str($$.token), + $$.string? $$.string : "" ); } +%printer { fprintf(yyo, "%ld '%s'", + $$.number, $$.string? $$.string : "" ); } + +%type NAME NUMSTR LITERAL PSEUDOTEXT +%type LSUB RSUB SUBSCRIPT +%type namelit name_any name_one +%type name subscript subscripts inof +%token BOOL +%token FEATURE 363 NUMBER 302 EXCEPTION_NAME 280 "EXCEPTION NAME" + +%type cdf_expr +%type cdf_relexpr cdf_reloper cdf_and cdf_bool_expr +%type cdf_factor +%type cdf_cond_expr override + +%type filename +%type filenames + +%token BY 476 +%token COPY 360 +%token CDF_DISPLAY 382 ">>DISPLAY" +%token IN 595 +%token NAME 286 +%token NUMSTR 304 "numeric literal" +%token OF 676 +%token PSEUDOTEXT 711 +%token REPLACING 733 +%token LITERAL 297 +%token SUPPRESS 374 + +%token LSUB 365 "(" +%token SUBSCRIPT 373 RSUB 370 ")" + +%token CDF_DEFINE 381 ">>DEFINE" +%token CDF_IF 383 ">>IF" +%token CDF_ELSE 384 ">>ELSE" +%token CDF_END_IF 385 ">>END-IF" +%token CDF_EVALUATE 386 ">>EVALUATE" +%token CDF_WHEN 387 ">>WHEN" +%token CDF_END_EVALUATE 388 ">>END-EVALUATE" + +%token AS 458 CONSTANT 359 DEFINED 361 +%type DEFINED +%token OTHER 688 PARAMETER_kw 366 "PARAMETER" +%token OFF 677 OVERRIDE 367 +%token THRU 929 +%token TRUE_kw 803 "True" + +%token CALL_COBOL 389 "CALL" +%token CALL_VERBATIM 390 "CALL (as C)" + +%token TURN 805 CHECKING 486 LOCATION 639 ON 679 WITH 831 + +%left OR 930 +%left AND 931 +%right NOT 932 +%left '<' '>' '=' NE 933 LE 934 GE 935 +%left '-' '+' +%left '*' '/' +%right NEG 937 + +%define api.prefix {ydf} +%define api.token.prefix{YDF_} + +%locations +%define parse.error verbose +%% +top: partials { YYACCEPT; } + | copy '.' + { + const char *library = copybook.library(); + if( !library ) library = "SYSLIB"; + const char *source = copybook.source(); + dbgmsg("COPY %s from %s", source, library); + YYACCEPT; + } + | copy error { + error_msg(@error, "COPY directive must end in a '.'"); + YYACCEPT; + } + | completes { YYACCEPT; } + ; + +completes: complete + | completes complete + | completes partial + ; +complete: cdf_define + | cdf_display + | cdf_turn + | cdf_call_convention + ; + + /* + * To do: read ISO 2022 to see how >>DISPLAY is dictionary! + * To do: DISPLAY UPON + * To do: decide what to do about newlines, and when; DISPLAY has + * {}... in the specification. + */ +cdf_display: CDF_DISPLAY strings { + if( scanner_parsing() ) { + fprintf(stderr, "%s\n", display_msg); + free(display_msg); + display_msg = NULL; + } + } + ; +strings: LITERAL { + display_msg = xstrdup($1); + } + | strings LITERAL { + char *p = display_msg; + display_msg = xasprintf("%s %s", p, $2); + free(p); + } + ; + +partials: partial + { + if( ! scanner_parsing() ) YYACCEPT; + } + | partials partial + { + if( ! scanner_parsing() ) YYACCEPT; + } + ; +partial: cdf_if /* text */ + | CDF_ELSE { scanner_parsing_toggle(); } + | CDF_END_IF { scanner_parsing_pop(); } + | cdf_evaluate /* text */ + | cdf_eval_when /* text */ + | CDF_END_EVALUATE { scanner_parsing_pop(); } + ; + +cdf_define: CDF_DEFINE cdf_constant NAME as cdf_expr[value] override + { + if( keyword_tok($NAME) ) { + error_msg(@NAME, "%s is a COBOL keyword", $NAME); + YYERROR; + } + if( !cdfval_add( $NAME, cdfval_t($value), $override) ) { + error_msg(@NAME, "name already in dictionary: %s", $NAME); + const cdfval_t& entry = dictionary[$NAME]; + if( entry.filename ) { + error_msg(@NAME, "%s previously defined in %s:%d", + $NAME, entry.filename, entry.lineno); + } else { + error_msg(@NAME, "%s was defined on the command line", $NAME); + } + YYERROR; + } + } + | CDF_DEFINE cdf_constant NAME '=' cdf_expr[value] override + { /* accept, but as error */ + if( scanner_parsing() ) { + error_msg(@NAME, "CDF error: %s = value invalid", $NAME); + } + } + | CDF_DEFINE cdf_constant NAME as OFF + { + cdfval_off( $NAME); + } + | CDF_DEFINE cdf_constant NAME as PARAMETER_kw override + /* + * "If the PARAMETER phrase is specified, the value referenced + * by compilation-variable-name-1 is obtained from the + * operating environment by an implementor-defined method...." + * It's a noop for us, because parameters defined with -D are + * available regardless. + */ + { + if( 0 == dictionary.count($NAME) ) { + yywarn("CDF: '%s' is defined AS PARAMETER " + "but was not defined", $NAME); + } + } + | CDF_DEFINE FEATURE as ON { + auto feature = cbl_gcobol_feature_t($2); + if( ! cobol_gcobol_feature_set(feature, true) ) { + error_msg(@FEATURE, ">>DEFINE %EBCDIC-MODE is invalid within program body"); + } + } + | CDF_DEFINE FEATURE as OFF { + auto feature = cbl_gcobol_feature_t($2); + if( ! cobol_gcobol_feature_set(feature, false) ) { + error_msg(@FEATURE, ">>DEFINE %EBCDIC-MODE is invalid within program body"); + } + } + ; +cdf_constant: %empty + | CONSTANT + ; +override: %empty { $$ = false; } + | OVERRIDE { $$ = true; } + ; + +cdf_turn: TURN except_names except_check + { + apply_cdf_turn(exception_turns); + exception_turns.clear(); + } + ; + +cdf_call_convention: + CALL_COBOL { + current_call_convention(cbl_call_cobol_e); + } + | CALL_VERBATIM { + current_call_convention(cbl_call_verbatim_e); + } + ; + + +except_names: except_name + | except_names except_name + ; +except_name: EXCEPTION_NAME[ec] { + assert($ec != ec_none_e); + exception_turns.add_exception(ec_type_t($ec)); + } + | EXCEPTION_NAME[ec] filenames { + assert($ec != ec_none_e); + std::list files; + std::copy( $filenames->begin(), $filenames->end(), + std::back_inserter(files) ); + exception_turns.add_exception(ec_type_t($ec), files); + } + ; + +except_check: CHECKING on { exception_turns.enabled = true; } + | CHECKING OFF { exception_turns.enabled = false; } + | CHECKING on with LOCATION + { + exception_turns.enabled = exception_turns.location = true; + } + ; + +filenames: filename { + $$ = new std::set; + $$->insert(symbol_index(symbol_elem_of($1))); + } + | filenames filename { + $$ = $1; + auto inserted = $$->insert(symbol_index(symbol_elem_of($2))); + if( ! inserted.second ) { + error_msg(@2, "%s: No file-name shall be specified more than " + " once for one exception condition", $filename->name); + } + } + ; +filename: NAME + { + struct symbol_elem_t *e = symbol_file(PROGRAM, $1); + if( !(e && e->type == SymFile) ) { + error_msg(@NAME, "invalid file name '%s'", $NAME); + YYERROR; + } + $$ = cbl_file_of(e); + } + ; + +cdf_if: CDF_IF cdf_cond_expr { + scanner_parsing(YDF_CDF_IF, $2); + } + | CDF_IF error { + ////if( scanner_parsing() ) yyerrok; + } CDF_END_IF { // not pushed, don't pop + if( ! scanner_parsing() ) YYACCEPT; + } + ; + +cdf_evaluate: CDF_EVALUATE cdf_expr + | CDF_EVALUATE TRUE_kw + ; + +cdf_eval_when: CDF_WHEN cdf_eval_obj + ; + +cdf_eval_obj: cdf_cond_expr + | cdf_expr THRU cdf_expr + | OTHER + ; + +cdf_cond_expr: BOOL + | NAME DEFINED[maybe] + { + auto p = dictionary.find($1); + bool found = p != dictionary.end(); + if( !$maybe ) found = ! found; + if( ! found ) { + $$ = !$2; + dbgmsg("CDF: %s not found in dictionary (result %s)", + $1, $$? "true" : "false"); + } else { + $$ = $2; + dbgmsg("CDF: %s found in dictionary (result %s)", + $1, $$? "true" : "false"); + } + } + | cdf_bool_expr { $$ = $1(@1) == 0? false : true; } + | FEATURE DEFINED { + const auto& feature($1); + $$ = (feature == int(feature & cbl_gcobol_features)); + dbgmsg("CDF: feature 0x%02x is %s", $1, $$? "ON" : "OFF"); + } + ; + + /* + * "Abbreviated combined relation conditions + * shall not be specified." + */ +cdf_bool_expr: cdf_bool_expr OR cdf_and { $$ = cdfval_t($1(@1) || $3(@3)); } + | cdf_and + ; + +cdf_and: cdf_and AND cdf_reloper { $$ = cdfval_t($1(@1) && $3(@3)); } + | cdf_reloper + ; + +cdf_reloper: cdf_relexpr + | NOT cdf_relexpr { $$ = cdfval_t($2.number? 1 : 0); } + ; + +cdf_relexpr: cdf_relexpr '<' cdf_expr { $$ = $1(@1) < $3(@3); } + | cdf_relexpr LE cdf_expr { $$ = $1(@1) <= $3(@3); } + | cdf_relexpr '=' cdf_expr { + $$ = cdfval_t(false); + if( ( $1.string && $3.string) || + (!$1.string && !$3.string) ) + { + $$ = $1 == $3; + } else { + const char *msg = $1.string? + "incommensurate comparison is FALSE: '%s' = %ld" : + "incommensurate comparison is FALSE: %ld = '%s'" ; + error_msg(@1, msg); + } + } + | cdf_relexpr NE cdf_expr + { + $$ = cdfval_t(false); + if( ( $1.string && $3.string) || + (!$1.string && !$3.string) ) + { + $$ = $1 != $3; + } else { + const char *msg = $1.string? + "incommensurate comparison is FALSE: '%s' = %ld" : + "incommensurate comparison is FALSE: %ld = '%s'" ; + error_msg(@1, msg); + } + } + | cdf_relexpr GE cdf_expr { $$ = $1(@1) >= $3(@3); } + | cdf_relexpr '>' cdf_expr { $$ = $1(@1) > $3(@3); } + | cdf_expr + ; + +cdf_expr: cdf_expr '+' cdf_expr { $$ = $1(@1) + $3(@3); } + | cdf_expr '-' cdf_expr { $$ = $1(@1) - $3(@3); } + | cdf_expr '*' cdf_expr { $$ = $1(@1) * $3(@3); } + | cdf_expr '/' cdf_expr { $$ = $1(@1) / $3(@3); } + | '+' cdf_expr %prec NEG { $$ = $2(@2); } + | '-' cdf_expr %prec NEG { $$ = negate($2(@2)); } + | '(' cdf_bool_expr ')' { $$ = $2(@2); } + | cdf_factor + ; + +cdf_factor: NAME { + auto that = dictionary.find($1); + if( that != dictionary.end() ) { + $$ = that->second; + } else { + if( ! scanner_parsing() ) { + yywarn("CDF skipping: no such variable '%s' (ignored)", $1); + } else { + error_msg(@NAME, "CDF error: no such variable '%s'", $1); + } + $$ = cdfval_t(); + } + } + | NUMBER { $$ = cdfval_t($1); } + | LITERAL { $$ = cdfval_t($1); } + | NUMSTR { + auto value = integer_literal($NUMSTR); + if( !value.second ) { + error_msg(@1, "CDF error: parsed %s as %ld", + $NUMSTR, value.first); + YYERROR; + } + $$ = cdfval_t(value.first); + } + ; + +copy: copy_impl + ; +copy_impl: copybook_name suppress REPLACING replace_bys + | copybook_name suppress + ; +copybook_name: COPY name_one[src] + { + if( -1 == copybook.open(@src, $src.string) ) { + error_msg(@src, "could not open copybook file " + "for '%s'", $src.string); + YYERROR; + } + } + | COPY name_one[src] IN name_one[lib] + { + copybook.library(@lib, $lib.string); + if( -1 == copybook.open(@src, $src.string) ) { + error_msg(@src, "could not open copybook file " + "for '%s' in '%'s'", $src.string, $lib.string); + YYERROR; + } + } + ; + +replace_bys: replace_by + | replace_bys replace_by + ; + +replace_by: name_any[a] BY name_any[b] + { + bool add_whitespace = false; + replace_type_t type = {}; + switch($a.token) { + case YDF_NUMSTR: + case YDF_LITERAL: + type = string_e; + break; + case YDF_NAME: + type = token_e; + break; + case YDF_PSEUDOTEXT: + type = pseudo_e; + add_whitespace = $b.token != YDF_PSEUDOTEXT; + break; + default: + cbl_err("%s:%d: logic error on token %s", + __FILE__, __LINE__, keyword_str($a.token)); + break; + } + char *replacement = const_cast($b.string); + if( add_whitespace ) { + char *s = xasprintf(" %s ", replacement); + free(replacement); + replacement = s; + } + copybook.replacement( type, $a.string, replacement ); + } + ; + +suppress: %empty + | SUPPRESS + { + copybook.suppress(); + } + ; + +name_any: namelit + | PSEUDOTEXT { $$ = (cdf_arg_t){YDF_PSEUDOTEXT, $1}; } + ; + +name_one: NAME + { + cdf_arg_t arg = { YDF_NAME, $1 }; + auto p = dictionary.find($1); + + if( p != dictionary.end() ) { + arg.string = p->second.string; + } + $$ = arg; + } + | NUMSTR { $$ = (cdf_arg_t){YDF_NUMSTR, $1}; } + | LITERAL { $$ = (cdf_arg_t){YDF_LITERAL, $1}; } + ; + +namelit: name + { + cdf_arg_t arg = { YDF_NAME, $1 }; + auto p = dictionary.find($1); + + if( p != dictionary.end() ) { + arg.string = p->second.string; + } + $$ = arg; + } + | name subscripts + { + char *s = xasprintf( "%s%s", $1, $2 ); + free(const_cast($1)); + free(const_cast($2)); + + cdf_arg_t arg = { YDF_NAME, s }; + $$ = arg; + } + | NUMSTR { $$ = (cdf_arg_t){YDF_NUMSTR, $1}; } + | LITERAL { $$ = (cdf_arg_t){YDF_LITERAL, $1}; } + ; + +name: NAME + | name inof NAME + { + char *s = xasprintf( "%s %s %s", $1, $2, $3 ); + assert($$ == $1); + free(const_cast($1)); + free(const_cast($3)); + $$ = s; + } + ; +inof: IN { static const char in[] = "IN"; $$ = in; } + | OF { static const char of[] = "OF"; $$ = of; } + ; + +subscripts: subscript + | subscripts subscript + { + char *s = xasprintf("%s%s", $1, $2 ); + if( $$ != $1 ) free(const_cast($$)); + free(const_cast($1)); + free(const_cast($2)); + $$ = s; + } + ; +subscript: SUBSCRIPT + | LSUB subscript RSUB + { + char *s = xasprintf( "%s%s%s", $1, $2, $3 ); + free(const_cast($1)); + free(const_cast($2)); + free(const_cast($3)); + $$ = s; + } + ; + +as: %empty + | AS + ; + +on: %empty + | ON + ; + +with: %empty + | WITH + ; + +%% + +static YYLTYPE cdf_location; + +static YYLTYPE +location_set( const YYLTYPE& loc ) { + return cdf_location = loc; +} + +bool // used by cobol1.cc +defined_cmd( const char arg[] ) +{ + cdfval_t value(1); + + char *name = xstrdup(arg); + char *p = strchr(name, '='); + if(p) { + *p++ = '\0'; + int pos, number; + if( 1 == sscanf(p, "%d%n", &number, &pos) && size_t(pos) == strlen(p) ) { + value = cdfval_t(number); + } else { + value = cdfval_t(p); // it's a string + } + } + + dictionary[name] = value; + + auto cdf_name = dictionary.find(name); + assert(cdf_name != dictionary.end()); + assert(cdf_name->second.is_numeric() || cdf_name->second.string != NULL); + + if( yydebug ) { + if( cdf_name->second.is_numeric() ) { + dbgmsg("%s: added -D %s = %ld", __func__, name, cdf_name->second.as_number()); + } else { + dbgmsg("%s: added -D %s = \"%s\"", __func__, name, cdf_name->second.string); + } + } + return true; +} + +bool operator==( const cdfval_base_t& lhs, int rhs ) { + gcc_assert( !lhs.string ); + return lhs.number == rhs; +} + +bool operator||( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return lhs.number || rhs.number; +} + +bool operator&&( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return lhs.number && rhs.number; +} + +cdfval_t operator<( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return cdfval_t(lhs.number < rhs.number); +} + +cdfval_t operator<=( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return cdfval_t(lhs.number <= rhs.number); +} + +cdfval_t operator==( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + if( lhs.string && rhs.string ) { + return cdfval_t(0 == strcasecmp(lhs.string, rhs.string)); + } + if( !lhs.string && !rhs.string ) { + return cdfval_t(lhs.number == rhs.number); + } + cbl_internal_error("incommensurate operands"); + return false; +} + +cdfval_t operator!=( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + if( lhs.string && rhs.string ) { + return cdfval_t(0 != strcasecmp(lhs.string, rhs.string)); + } + if( !lhs.string && !rhs.string ) { + return cdfval_t(lhs.number != rhs.number); + } + cbl_internal_error("incommensurate operands"); + return false; +} + +cdfval_t operator>=( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return cdfval_t(lhs.number >= rhs.number); +} + +cdfval_t operator>( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return cdfval_t(lhs.number > rhs.number); +} + +cdfval_t operator+( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return cdfval_t(lhs.number + rhs.number); +} + +cdfval_t operator-( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return cdfval_t(lhs.number - rhs.number); +} + +cdfval_t operator*( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return cdfval_t(lhs.number * rhs.number); +} + +cdfval_t operator/( const cdfval_base_t& lhs, const cdfval_base_t& rhs ) { + gcc_assert( !lhs.string && !rhs.string ); + return cdfval_t(lhs.number / rhs.number); +} + +cdfval_t negate( cdfval_base_t lhs ) { + gcc_assert( !lhs.string ); + lhs.number = -lhs.number; + return lhs; +} + +#undef yylex +int yylex(void); + +static int ydflex(void) { + return yylex(); +} + +bool +cdf_value( const char name[], cdfval_t value ) { + auto p = dictionary.find(name); + + if( p != dictionary.end() ) return false; + + dictionary[name] = value; + return true; +} + +const cdfval_t * +cdf_value( const char name[] ) { + auto p = dictionary.find(name); + + if( p == dictionary.end() ) return NULL; + + return &p->second; +} + +static bool +verify_integer( const YDFLTYPE& loc, const cdfval_base_t& val ) { + if( val.string ) { + error_msg(loc, "'%s' is not an integer", val.string); + return false; + } + return true; +} + +cdfval_base_t& +cdfval_base_t::operator()( const YDFLTYPE& loc ) { + static cdfval_t zero(0); + return verify_integer(loc, *this) ? *this : zero; +} diff --git a/gcc/cobol/cdfval.h b/gcc/cobol/cdfval.h new file mode 100644 index 00000000000..1453f2af5f8 --- /dev/null +++ b/gcc/cobol/cdfval.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _CDF_VAL_H_ +#define _CDF_VAL_H_ + +#include +#include +#include + +bool scanner_parsing(); + +struct YDFLTYPE; +struct cdfval_base_t { + bool off; + const char *string; + int64_t number; + cdfval_base_t& operator()( const YDFLTYPE& loc ); +}; + +struct cdf_arg_t { + int token; + const char *string; +}; + +extern int yylineno; +const char * cobol_filename(); + +struct cdfval_t : public cdfval_base_t { + int lineno; + const char *filename; + + cdfval_t() + : lineno(yylineno), filename(cobol_filename()) + { + cdfval_base_t::off = false; + cdfval_base_t::string = NULL; + cdfval_base_t::number = 0; + } + cdfval_t( const char value[] ) + : lineno(yylineno), filename(cobol_filename()) + { + cdfval_base_t::off = false; + cdfval_base_t::string = value; + cdfval_base_t::number = 0; + } + cdfval_t( long long value ) + : lineno(yylineno), filename(cobol_filename()) + { + cdfval_base_t::off = false; + cdfval_base_t::string = NULL; + cdfval_base_t::number = value; + } + cdfval_t( int64_t value ) + : lineno(yylineno), filename(cobol_filename()) + { + cdfval_base_t::off = false; + cdfval_base_t::string = NULL; + cdfval_base_t::number = value; + } + cdfval_t( int value ) + : lineno(yylineno), filename(cobol_filename()) + { + cdfval_base_t::off = false; + cdfval_base_t::string = NULL; + cdfval_base_t::number = value; + } + cdfval_t( const cdfval_base_t& value ) + : lineno(yylineno), filename(cobol_filename()) + { + cdfval_base_t *self(this); + *self = value; + } + + bool is_numeric() const { return ! (off || string); } + int64_t as_number() const { assert(is_numeric()); return number; } +}; + +bool +cdf_value( const char name[], cdfval_t value ); + +const cdfval_t * +cdf_value( const char name[] ); + +#endif diff --git a/gcc/cobol/cobol-system.h b/gcc/cobol/cobol-system.h new file mode 100644 index 00000000000..81529bd3a67 --- /dev/null +++ b/gcc/cobol/cobol-system.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef COBOL_SYSTEM_H +#define COBOL_SYSTEM_H + +// The following "local" #include is part of the GCC core code +#include "config.h" + +/* Define this so that inttypes.h defines the PRI?64 macros even + when compiling with a C++ compiler. Define it here so in the + event inttypes.h gets pulled in by another header it is already + defined. */ +#define __STDC_FORMAT_MACROS + +// These must be included before the #poison declarations in system.h. + +#define INCLUDE_STRING +#define INCLUDE_VECTOR +#define INCLUDE_MAP +#define INCLUDE_SET +#define INCLUDE_LIST +#define INCLUDE_ALGORITHM + +#include +#include +#include +#include +#include +#include + +#include +#include + +// The following "local" #include is part of the GCC core code +#include "system.h" + +#endif diff --git a/gcc/cobol/cobol1.cc b/gcc/cobol/cobol1.cc new file mode 100644 index 00000000000..c2e68edea25 --- /dev/null +++ b/gcc/cobol/cobol1.cc @@ -0,0 +1,692 @@ +/* gcobol backend interface + Copyright (C) 2021-2025 Free Software Foundation, Inc. + Contributed by Robert J. Dubner and James K. Lowden + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +#include "cobol-system.h" +#include "coretypes.h" +#include "tree.h" +#include "diagnostic.h" +#include "opts.h" +#include "debug.h" +#include "langhooks.h" +#include "langhooks-def.h" +#include "target.h" +#include "stringpool.h" +#define HOWEVER_GCC_DEFINES_TREE 1 +#include "ec.h" +#include "common-defs.h" +#include "util.h" +#include "cbldiag.h" +#include "symbols.h" +#include "inspect.h" +#include "io.h" +#include "genapi.h" +#include "exceptl.h" +#include "exceptg.h" +#include "util.h" +#include "gengen.h" // This has some GTY(()) markers +#include "structs.h" // This has some GTY(()) markers + +/* Required language-dependent contents of a type. + + Without it, we get + + gt-cobol-cobol1.h:858: undefined reference to `gt_pch_nx_lang_type(void *) + + */ + +struct GTY (()) lang_type + { + char dummy; + }; + +/* Language-dependent contents of a decl. + Without it, we get + + gt-cobol-cobol1.h:674: more undefined references to `gt_pch_nx_lang_decl + + */ + +struct GTY (()) lang_decl + { + char dummy; + }; + +/* + * Language-dependent contents of an identifier. + * This must include a tree_identifier. + */ +struct GTY (()) lang_identifier + { + struct tree_identifier common; + }; + +/* The resulting tree type. */ + +union GTY ((desc ("TREE_CODE (&%h.generic) == IDENTIFIER_NODE"), + chain_next ("CODE_CONTAINS_STRUCT (TREE_CODE (&%h.generic), " + "TS_COMMON) ? ((union lang_tree_node *) TREE_CHAIN " + "(&%h.generic)) : NULL"))) lang_tree_node + { + union tree_node GTY ((tag ("0"), desc ("tree_node_structure (&%h)"))) generic; + struct lang_identifier GTY ((tag ("1"))) identifier; + }; + +/* We don't use language_function. + + But without the placeholder: + + /usr/bin/ld: gtype-desc.o: in function `gt_ggc_mx_function(void*)': + ../build/gcc/gtype-desc.cc:1763: undefined reference to `gt_ggc_mx_language_function(void*)' + /usr/bin/ld: gtype-desc.o: in function `gt_pch_nx_function(void*)': + ../build/gcc/gtype-desc.cc:5727: undefined reference to `gt_pch_nx_language_function(void*)' + + */ + +struct GTY (()) language_function + { + int dummy; + }; + +/* + * Language hooks. + */ + +#define ATTR_NULL 0 +#define ATTR_LEAF_LIST (ECF_LEAF) +#define ATTR_NOTHROW_LEAF_LIST (ECF_NOTHROW | ECF_LEAF) +#define ATTR_NOTHROW_LEAF_MALLOC_LIST (ECF_NOTHROW | ECF_LEAF | ECF_MALLOC) +#define ATTR_CONST_NOTHROW_LEAF_LIST (ECF_NOTHROW | ECF_LEAF | ECF_CONST) +#define ATTR_PURE_NOTHROW_LEAF_LIST (ECF_NOTHROW | ECF_LEAF | ECF_PURE) +#define ATTR_NOTHROW_LIST (ECF_NOTHROW) +#define ATTR_CONST_NOTHROW_LIST (ECF_NOTHROW | ECF_CONST) +#define ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LIST \ + (ECF_NOTHROW | ECF_LEAF | ECF_MALLOC) +#define ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LEAF_LIST \ + (ECF_NOTHROW | ECF_LEAF) +#define ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST \ + (ECF_COLD | ECF_NORETURN | \ + ECF_NOTHROW | ECF_LEAF) +#define ATTR_PURE_NOTHROW_NONNULL_LEAF (ECF_PURE|ECF_NOTHROW|ECF_LEAF) +#define ATTR_MALLOC_WARN_UNUSED_RESULT_NOTHROW_NONNULL_LEAF (ECF_MALLOC|ECF_NOTHROW|ECF_LEAF) +#define ATTR_TMPURE_NORETURN_NOTHROW_LEAF_COLD_LIST (ECF_TM_PURE|ECF_NORETURN|ECF_NOTHROW|ECF_LEAF|ECF_COLD) +#define ATTR_NORETURN_NOTHROW_LIST (ECF_NORETURN|ECF_NOTHROW) +#define ATTR_NOTHROW_NONNULL_LEAF (ECF_NOTHROW|ECF_LEAF) + +static void +gfc_define_builtin (const char *name, tree type, enum built_in_function code, + const char *library_name, int attr) +{ + tree decl; + + decl = add_builtin_function (name, type, code, BUILT_IN_NORMAL, + library_name, NULL_TREE); + set_call_expr_flags (decl, attr); + + set_builtin_decl (code, decl, true); +} + +static void +create_our_type_nodes_init() + { + for(int i=0; i<256; i++) + { + char_nodes[i] = build_int_cst_type(CHAR, i); + } + + // Create some useful constants to avoid cluttering up the code + // build_int_cst_type() calls + pvoid_type_node = build_pointer_type(void_type_node); + integer_minusone_node = build_int_cst_type(INT, -1); + integer_two_node = build_int_cst_type(INT, 2); + integer_eight_node = build_int_cst_type(INT, 8); + size_t_zero_node = build_int_cst_type(SIZE_T, 0); + int128_zero_node = build_int_cst_type(INT128, 0); + int128_five_node = build_int_cst_type(INT128, 5); + int128_ten_node = build_int_cst_type(INT128, 10); + char_ptr_type_node = build_pointer_type(CHAR); + uchar_ptr_type_node = build_pointer_type(UCHAR); + wchar_ptr_type_node = build_pointer_type(WCHAR); + long_double_ten_node = build_real_from_int_cst( + LONGDOUBLE, + build_int_cst_type(INT,10)); + sizeof_size_t = build_int_cst_type(SIZE_T, sizeof(size_t)); + sizeof_pointer = build_int_cst_type(SIZE_T, sizeof(void *)); + + bool_true_node = build2(EQ_EXPR, + integer_type_node, + integer_one_node, + integer_one_node); + + bool_false_node = build2( EQ_EXPR, + integer_type_node, + integer_one_node, + integer_zero_node); + } + + +static bool +cobol_langhook_init (void) + { + build_common_tree_nodes (true); + + create_our_type_nodes_init(); + + tree char_pointer_type_node = build_pointer_type (char_type_node); + tree const_char_pointer_type_node + = build_pointer_type (build_type_variant (char_pointer_type_node, 1, 0)); + + tree ftype; + + ftype = build_function_type_list (pvoid_type_node, + size_type_node, + NULL_TREE); + gfc_define_builtin ("__builtin_malloc", + ftype, + BUILT_IN_MALLOC, + "malloc", + ATTR_NOTHROW_LEAF_MALLOC_LIST); + + ftype = build_function_type_list (pvoid_type_node, pvoid_type_node, + size_type_node, NULL_TREE); + gfc_define_builtin ("__builtin_realloc", ftype, BUILT_IN_REALLOC, + "realloc", ATTR_NOTHROW_LEAF_LIST); + + ftype = build_function_type_list (void_type_node, + pvoid_type_node, NULL_TREE); + gfc_define_builtin ("__builtin_free", ftype, BUILT_IN_FREE, + "free", ATTR_NOTHROW_LEAF_LIST); + + ftype = build_function_type_list (pvoid_type_node, + const_ptr_type_node, + integer_type_node, + size_type_node, + NULL_TREE); + gfc_define_builtin ("__builtin_memchr", ftype, BUILT_IN_MEMCHR, + "memchr", ATTR_PURE_NOTHROW_NONNULL_LEAF); + + + ftype = build_function_type_list (size_type_node, + const_char_pointer_type_node, + NULL_TREE); + gfc_define_builtin ("__builtin_strlen", ftype, BUILT_IN_STRLEN, + "strlen", ATTR_PURE_NOTHROW_NONNULL_LEAF); + + + ftype = build_function_type_list (char_pointer_type_node, + const_char_pointer_type_node, + NULL_TREE); + gfc_define_builtin ("__builtin_strdup", ftype, BUILT_IN_STRDUP, + "strdup", ATTR_MALLOC_WARN_UNUSED_RESULT_NOTHROW_NONNULL_LEAF); + + ftype = build_function_type_list (void_type_node, NULL_TREE); + gfc_define_builtin ("__builtin_abort", ftype, BUILT_IN_ABORT, + "abort", ATTR_TMPURE_NORETURN_NOTHROW_LEAF_COLD_LIST); + + ftype = build_function_type_list (void_type_node, + integer_type_node, + NULL_TREE); + gfc_define_builtin ("__builtin_exit", ftype, BUILT_IN_EXIT, + "exit", ATTR_TMPURE_NORETURN_NOTHROW_LEAF_COLD_LIST); + + ftype = build_function_type_list (integer_type_node, + const_char_pointer_type_node, + const_char_pointer_type_node, + size_type_node, + NULL_TREE); + gfc_define_builtin ("__builtin_strncmp", ftype, BUILT_IN_STRNCMP, + "strncmp", ATTR_PURE_NOTHROW_NONNULL_LEAF); + + ftype = build_function_type_list (integer_type_node, + const_char_pointer_type_node, + const_char_pointer_type_node, + NULL_TREE); + gfc_define_builtin ("__builtin_strcmp", ftype, BUILT_IN_STRCMP, + "strcmp", ATTR_PURE_NOTHROW_NONNULL_LEAF); + + ftype = build_function_type_list (char_pointer_type_node, + char_pointer_type_node, + const_char_pointer_type_node, + NULL_TREE); + gfc_define_builtin ("__builtin_strcpy", ftype, BUILT_IN_STRCPY, + "strcpy", ATTR_NOTHROW_NONNULL_LEAF); + + build_common_builtin_nodes (); + + // Make sure this is a supported configuration. + if( !targetm.scalar_mode_supported_p (TImode) || !float128_type_node ) + { + sorry ("COBOL requires a 64-bit configuration"); + } + + return true; + } + + +void cobol_set_debugging( bool flex, bool yacc, bool parser ); +void cobol_set_indicator_column( int column ); +void copybook_directory_add( const char gcob_copybook[] ); +void copybook_extension_add( const char ext[] ); +bool defined_cmd( const char arg[] ); +void lexer_echo( bool tf ); + +static void +cobol_langhook_init_options_struct (struct gcc_options *opts) { + opts->x_yy_flex_debug = 0; + opts->x_yy_debug = 0; + opts->x_cobol_trace_debug = 0; + + cobol_set_debugging( false, false, false ); + + copybook_directory_add( getenv("GCOB_COPYBOOK") ); +} + +static unsigned int +cobol_option_lang_mask (void) { + return CL_Cobol; +} + +bool use_static_call( bool yn ); +void add_cobol_exception( ec_type_t type, bool ); + +bool include_file_add(const char input[]); +bool preprocess_filter_add( const char filter[] ); + +bool max_errors_exceeded( int nerr ) { + return flag_max_errors != 0 && flag_max_errors <= nerr; +} + +static void +enable_exceptions( bool enable ) { + for( char * name = xstrdup(cobol_exceptions); + NULL != (name = strtok(name, ",")); name = NULL ) { + ec_type_t type = ec_type_of(name); + if( type == ec_none_e ) { + yywarn("unrecognized exception '%s' was ignored", name); + continue; + } + ec_disposition_t disposition = ec_type_disposition(type); + if( disposition != ec_implemented(disposition) ) { + cbl_unimplemented("exception '%s'", name); + } + add_cobol_exception(type, enable ); + } +} + +static bool +cobol_langhook_handle_option (size_t scode, + const char *arg ATTRIBUTE_UNUSED, + HOST_WIDE_INT value, + int kind ATTRIBUTE_UNUSED, + location_t loc ATTRIBUTE_UNUSED, + const struct + cl_option_handlers *handlers ATTRIBUTE_UNUSED) + { + // process_command (decoded_options_count, decoded_options); + enum opt_code code = (enum opt_code) scode; + + switch(code) + { + case OPT_D: + defined_cmd(arg); + return true; + case OPT_E: + lexer_echo(true); + return true; + + case OPT_I: + copybook_directory_add(arg); + return true; + case OPT_copyext: + copybook_extension_add(cobol_copyext); + return true; + + case OPT_fstatic_call: + use_static_call( arg? true : false ); + return true; + + case OPT_fdefaultbyte: + wsclear(cobol_default_byte); + return true; + + case OPT_fflex_debug: + yy_flex_debug = 1; + cobol_set_debugging( true, yy_debug == 1, cobol_trace_debug == 1 ); + return true; + case OPT_fyacc_debug: + yy_debug = 1; + cobol_set_debugging(yy_flex_debug == 1, + true, + cobol_trace_debug == 1 ); + return true; + case OPT_ftrace_debug: + cobol_set_debugging( yy_flex_debug == 1, yy_debug == 1, true ); + return true; + + case OPT_fcobol_exceptions: { + if( cobol_exceptions[0] == '=' ) cobol_exceptions++; + enable_exceptions(value == 1); + return true; + } + + case OPT_fmax_errors: + flag_max_errors = atoi(arg); + return true; + + case OPT_ffixed_form: + cobol_set_indicator_column(-7); + return true; + case OPT_ffree_form: + cobol_set_indicator_column(0); + return true; + + case OPT_findicator_column: + cobol_set_indicator_column( indicator_column ); + return true; + + case OPT_dialect: + cobol_dialect_set(cbl_dialect_t(cobol_dialect)); + return true; + + case OPT_fsyntax_only: + mode_syntax_only(identification_div_e); + break; + case OPT_preprocess: + if( ! preprocess_filter_add(arg) ) { + cbl_errx( "could not execute preprocessor %s", arg); + } + return true; + case OPT_include: + if( ! include_file_add(cobol_include) ) { + cbl_errx( "could not include %s", cobol_include); + } + return true; + + case OPT_main: + // This isn't right. All OPT_main should be replaced + error("We should never see a non-equal dash-main in cobol1.c"); + exit(1); + return true; + + case OPT_main_: + register_main_switch(cobol_main_string); + return true; + + case OPT_nomain: + return true; + + case OPT_finternal_ebcdic: + cobol_gcobol_feature_set(feature_internal_ebcdic_e); + return true; + + default: + break; + } + + Cobol_handle_option_auto (&global_options, &global_options_set, + scode, arg, value, + cobol_option_lang_mask (), kind, + loc, handlers, global_dc); + + return true; + } + +void +cobol_parse_files (int nfile, const char **files); + +static void +cobol_langhook_parse_file (void) + { + cobol_parse_files (num_in_fnames, in_fnames); + } + +static tree +cobol_langhook_type_for_mode (enum machine_mode mode, int unsignedp) + { + if (mode == TYPE_MODE (float_type_node)) + return float_type_node; + + if (mode == TYPE_MODE (double_type_node)) + return double_type_node; + + if (mode == TYPE_MODE (float32_type_node)) + return float32_type_node; + + if (mode == TYPE_MODE (float64_type_node)) + return float64_type_node; + + if (mode == TYPE_MODE (float128_type_node)) + return float128_type_node; + + if (mode == TYPE_MODE (intQI_type_node)) + return unsignedp ? unsigned_intQI_type_node : intQI_type_node; + if (mode == TYPE_MODE (intHI_type_node)) + return unsignedp ? unsigned_intHI_type_node : intHI_type_node; + if (mode == TYPE_MODE (intSI_type_node)) + return unsignedp ? unsigned_intSI_type_node : intSI_type_node; + if (mode == TYPE_MODE (intDI_type_node)) + return unsignedp ? unsigned_intDI_type_node : intDI_type_node; + if (mode == TYPE_MODE (intTI_type_node)) + return unsignedp ? unsigned_intTI_type_node : intTI_type_node; + + if (mode == TYPE_MODE (integer_type_node)) + return unsignedp ? unsigned_type_node : integer_type_node; + + if (mode == TYPE_MODE (long_integer_type_node)) + return unsignedp ? long_unsigned_type_node : long_integer_type_node; + + if (mode == TYPE_MODE (long_long_integer_type_node)) + return unsignedp ? long_long_unsigned_type_node + : long_long_integer_type_node; + + if (COMPLEX_MODE_P (mode)) + { + if (mode == TYPE_MODE (complex_float_type_node)) + return complex_float_type_node; + if (mode == TYPE_MODE (complex_double_type_node)) + return complex_double_type_node; + if (mode == TYPE_MODE (complex_long_double_type_node)) + return complex_long_double_type_node; + if (mode == TYPE_MODE (complex_integer_type_node) && !unsignedp) + return complex_integer_type_node; + } + + return NULL; + } + +////static tree +////cobol_langhook_type_for_size (unsigned int bits ATTRIBUTE_UNUSED, +//// int unsignedp ATTRIBUTE_UNUSED) +//// { +//// gcc_unreachable (); +//// return NULL; +//// } + +/* Record a builtin function. We just ignore builtin functions. */ + +static tree +cobol_langhook_builtin_function (tree decl) + { + return decl; + } + +static bool +cobol_langhook_global_bindings_p (void) + { + return false; + } + +static tree +cobol_langhook_pushdecl (tree decl ATTRIBUTE_UNUSED) + { + // This function is necessary, but is apparently never being called + gcc_unreachable (); + } + +static tree +cobol_langhook_getdecls (void) + { + return NULL; + } + +char * +cobol_name_mangler(const char *cobol_name_) + { + // The caller should free the returned string. + + // This is a solution to the problem of hyphens and the fact that COBOL + // names can start with digits. + // + // COBOL names can't start with underscore; GNU assembler names can. + // Assembler names can't start with a digit 0-9; COBOL names can. + // + // We convert all COBOL names to lowercase, so uppercase characters aren't + // seen. + // + // COBOL names can have hyphens; assembler names can't. + // + // So if a name starts with a digit, we prepend an underscore. + // We convert the whole name to lowercase. + // We replace hyphens with '$' + // + + if( !cobol_name_ ) + { + return nullptr; + } + + // Allocate enough space for a prepended underscore and a final '\0' + char *cobol_name = (char *)xmalloc(strlen(cobol_name_)+2); + size_t n = 0; + if( cobol_name_[0] >= '0' && cobol_name_[0] <= '9' ) + { + // The name starts with 0-9, so we are going to lead it + // with an underscore + cobol_name[n++] = '_'; + } + for(size_t i=0; i. + +# Configure looks for the existence of this file to auto-config each language. +# We define several parameters used by configure: +# +# language - name of language as it would appear in $(LANGUAGES) +# compilers - value to add to $(COMPILERS) +# diff_excludes - files to ignore when building diffs between two versions. + +language="cobol" + +compilers="cobol1\$(exeext)" + +target_libs="target-libgcobol" + +# Files that should be scanned by gengtype.c to generate the garbage +# collection tables. + +gtfiles="\$(srcdir)/cobol/cobol1.cc" + +# Do not build by default +build_by_default="no" diff --git a/gcc/cobol/convert.cc b/gcc/cobol/convert.cc new file mode 100644 index 00000000000..a0ef9d5388d --- /dev/null +++ b/gcc/cobol/convert.cc @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cobol-system.h" + +#include "coretypes.h" +#include "tree.h" +#include "fold-const.h" +#include "convert.h" + +// This is required by some generic routines + +tree +convert (tree /*type*/, tree /*expr*/) +{ +// The routine is necessary, but in our testing of the GCOBOL compiler, it never +// is called. I am commenting this cloned code out. I am keeping it so I have +// something to refer to if and when the necessity to reconstitute it arises. +// RJ Dubner, 2025-02-17 +#if 0 + if (type == error_mark_node + || expr == error_mark_node + || TREE_TYPE (expr) == error_mark_node) + return error_mark_node; + + if (type == TREE_TYPE (expr)) + return expr; + + if (TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (TREE_TYPE (expr))) + return fold_convert (type, expr); + + switch (TREE_CODE (type)) + { + case VOID_TYPE: + case BOOLEAN_TYPE: + return fold_convert (type, expr); + case INTEGER_TYPE: + return fold (convert_to_integer (type, expr)); + case POINTER_TYPE: + return fold (convert_to_pointer (type, expr)); + case REAL_TYPE: + return fold (convert_to_real (type, expr)); + case COMPLEX_TYPE: + return fold (convert_to_complex (type, expr)); + default: + break; + } +#endif + + gcc_unreachable (); +} diff --git a/gcc/cobol/copybook.h b/gcc/cobol/copybook.h new file mode 100644 index 00000000000..3e2cf9d934e --- /dev/null +++ b/gcc/cobol/copybook.h @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef _COPYBOOK_H +#pragma message __FILE__ " included twice" +#else +#define _COPYBOOK_H + +FILE * copy_mode_start(); + +const char * cobol_filename(); +bool cobol_filename( const char *name, ino_t inode ); + +void scanner_parsing( int token, bool tf ); +void scanner_parsing_toggle(); +void scanner_parsing_pop(); + +/* + * COPY support On encountering a COPY statement, the parser continues + * to parse, collecting the replacement values, if any. At statement + * end (at the period), the system rearranges input to apply the + * replacements before the input text is read by the lexer. + */ + +enum replace_type_t { string_e, token_e, pseudo_e }; + +struct copybook_replace_t { + replace_type_t type; + const char *src, *tgt; +}; +class copybook_t; + +class copybook_elem_t { + friend copybook_t; + struct copybook_loc_t { + YYLTYPE loc; + const char *name; + copybook_loc_t() : name(NULL) {} + } source, library; + bool suppress; + static const char *extensions; + public: + struct { bool source, library; } literally; + int fd; + size_t nsubexpr; + std::deque replacements; + + copybook_elem_t() + : suppress(false) + , fd(-1) + , nsubexpr(0) + , regex_text(NULL) + { + literally = {}; + } + + void clear() { + suppress = false; + nsubexpr = 0; + if( fd ) close(fd); + fd = -1; + // TODO: free src & tgt + replacements.clear(); + } + + int open_file( const char dir[], bool literally = false ); + void extensions_add( const char ext[], const char alt[] ); + + static inline bool is_quote( const char ch ) { + return ch == '\'' || ch == '"'; + } + static inline bool quoted( const char name[] ) { + gcc_assert(name); + return is_quote(name[0]); + } + static char * dequote( const char orig[] ) { + gcc_assert(quoted(orig)); + auto name = (char*)xcalloc(1, strlen(orig)); + gcc_assert(name); + char *tgt = name; + + // For a literal name, we de-quote it and try to open it in the + // current working directory. The COBOL literal could include + // (escaped) doubled quotes, which we reduce to one. + for( const char *src = orig; src < orig + strlen(orig); ) { + if( is_quote(src[0]) ) { + if( src[0] == src[1] ) { + *tgt++ = *src++; // copy one of doubled quote + } + src++; // skip quote + continue; + } + *tgt++ = *src++; + } + *tgt = '\0'; + + return name; + } + +private: + char *regex_text; +}; + +class copybook_t { + std::list directories; + copybook_elem_t book; + + // Take copybook name from the environment, if defined, else use it verbatim. + static const char * transform_name( const char name[] ) { + char uname[ strlen(name) ]; + const char *value = getenv(name); + if( !value ) { + auto ename = name + strlen(name); + std::transform( name, ename, uname, + []( char ch ) { return TOUPPER(ch); } ); + value = getenv(uname); // try uppercase of envar name + if( !value ) value = name; // keep original unmodified + } + if( false && value != uname ) { + dbgmsg("using copybook file '%s' from environment variable '%s'", + value, name); + } + return xstrdup(value); + } + + public: + copybook_t() { directories.push_back(NULL); } + + void suppress( bool tf = true ) { book.suppress = tf; } + bool suppressed() { return book.suppress; } + void source( const YYLTYPE& loc, const char name[] ) { + book.source.loc = loc; + book.literally.source = copybook_elem_t::quoted(name); + book.source.name = book.literally.source? + copybook_elem_t::dequote(name) : transform_name(name); + } + void library( const YYLTYPE& loc, const char name[] ) { + book.library.loc = loc; + book.literally.library = copybook_elem_t::quoted(name); + book.library.name = book.literally.library? + copybook_elem_t::dequote(name) : transform_name(name); + } + void replacement( replace_type_t type, const char src[], const char tgt[] ) { + copybook_replace_t elem = { type, src, tgt }; + book.replacements.push_back(elem); + } + + copybook_elem_t *current() { return &book; } + const char *source() const { return book.source.name; } + const char *library() const { return book.library.name; } + + int open(YYLTYPE loc, const char name[]) { + int fd = -1; + book.clear(); + this->source(loc, name); + + for( auto dir : directories ) { + if( true ) { + dbgmsg("copybook_t::open '%s' OF '%s' %s", + book.source.name, + dir? dir: ".", + book.literally.source? ", literally" : "" ); + } + if( (fd = book.open_file(dir, book.literally.source)) != -1 ) break; + } + return fd; + } + + const char * directory_add( const char name[] ) { + directories.push_back(name); + return name; + } + void extensions_add( const char ext[], const char alt[] ); +}; + +extern copybook_t copybook; + +#endif diff --git a/gcc/cobol/dts.h b/gcc/cobol/dts.h new file mode 100644 index 00000000000..618f649c2e1 --- /dev/null +++ b/gcc/cobol/dts.h @@ -0,0 +1,109 @@ +/* + * Contributed to the public domain by James K. Lowden + * Tuesday October 17, 2023 + * + * This stand-in for std::regex was written because the implementation provided + * by the GCC libstdc++ in GCC 11 proved too slow, where "slow" means "appears + * not to terminate". Some invocations of std::regex_search took over 5 + * seconds (or minutes) and used over 1900 stack frames, and "never" returned. + * Because the same patterns and input presented no difficulty to the C standad + * library regex functions, I recast the C++ implementation in terms of + * regex(3). + * + * Unlike std::regex, this dts version supports only Posix EREs, and requires + * the input to be NUL-terminated. + * + * It is my hope and expectation to replace this implementation with the + * standard one when it is improved. + */ + +#include +#include + +#include + +namespace dts { + class csub_match : public regmatch_t { + const char *input; + public: + const char *first, *second; + bool matched; + + explicit csub_match( const char *input = NULL) + : input(input) + , first(NULL), second(NULL), matched(false) + { + static regmatch_t empty = { -1, -1 }; + regmatch_t& self(*this); + self = empty; + } + csub_match( const char input[], const regmatch_t& m ) + : input(input) + { + regmatch_t& self(*this); + self = m; + matched = rm_so != -1; + first = rm_so == -1? NULL : input + rm_so; + second = rm_eo == -1? NULL : input + rm_eo; + } + + int length() const { return rm_eo - rm_so; } + }; + + typedef std::vector cmatch; + + class regex : public regex_t { + size_t nsubexpr; + const char *pattern; + public: + enum cflag_t { extended = REG_EXTENDED, icase = REG_ICASE }; + + regex( const char pattern[], int flags ) : pattern(pattern) { + nsubexpr = 1 + std::count(pattern, pattern + strlen(pattern), '('); + int erc = regcomp(this, pattern, flags); + if( erc != 0 ) { + char msg[80]; + regerror(erc, this, msg, sizeof msg); +#if __cpp_exceptions + throw std::logic_error(msg); +#else + pattern = NULL; + cbl_errx("%s", msg); +#endif + } + } + ~regex() { regfree(this); } + + size_t size() const { return nsubexpr; } + bool ready() const { return pattern != NULL; } + private: + regex( const regex& ) {} + }; + + inline bool regex_search( const char input[], const char *eoinput, + cmatch& cm, regex& re ) { + if( eoinput != NULL && *eoinput != '\0' ) { +#if __cpp_exceptions + static const char msg[] = "input not NUL-terminated"; + throw std::domain_error( msg ); +#else + eoinput = strchr(input, '\0'); +#endif + } + if( eoinput == NULL ) eoinput = strchr(input, '\0'); + auto ncm = re.size(); + cm.resize(ncm); + regmatch_t cms[ncm]; + + + int erc = regexec( &re, input, ncm, cms, 0 ); + if( erc != 0 ) return false; + std::transform( cms, cms+ncm, cm.begin(), + [input]( const regmatch_t& m ) { + return csub_match( input, m ); + } ); + return true; + } +}; + + diff --git a/gcc/cobol/except.cc b/gcc/cobol/except.cc new file mode 100644 index 00000000000..859a76d6efb --- /dev/null +++ b/gcc/cobol/except.cc @@ -0,0 +1,370 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cobol-system.h" +#include "coretypes.h" +#include "tree.h" +#define HOWEVER_GCC_DEFINES_TREE 1 +#include "ec.h" +#include "common-defs.h" +#include "util.h" +#include "cbldiag.h" +#include "symbols.h" +#include "inspect.h" +#include "io.h" +#include "genapi.h" +#include "gengen.h" +#include "exceptl.h" +#include "util.h" + +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" + +static const ec_descr_t * +ec_type_descr( ec_type_t type ) { + auto p = std::find( __gg__exception_table, __gg__exception_table_end, type ); + if( p == __gg__exception_table_end ) { + cbl_internal_error("no such exception: 0x%04x", type); + } + return p; +} + +const char * +ec_type_str( ec_type_t type ) { + auto p = ec_type_descr(type); + return p->name; +} + +ec_disposition_t +ec_type_disposition( ec_type_t type ) { + auto p = ec_type_descr(type); + return p->disposition; +} + +static size_t +ec_level( ec_type_t ec ) { + if( ec == ec_all_e ) return 1; + if( 0 == (static_cast(ec) & ~EC_ALL_E) ) return 2; + return 3; +} + +cbl_enabled_exceptions_t enabled_exceptions; + +void +cbl_enabled_exceptions_t::dump() const { + if( empty() ) { + cbl_message(2, "cbl_enabled_exceptions_t: no exceptions" ); + return; + } + int i = 1; + for( auto& elem : *this ) { + cbl_message(2, "cbl_enabled_exceptions_t: %2d {%s, %s, %s, %zu}", + i++, + elem.enabled? " enabled" : "disabled", + elem.location? "location" : " none", + ec_type_str(elem.ec), + elem.file ); + } +} + + +bool +cbl_enabled_exceptions_t::turn_on_off( bool enabled, + bool location, + ec_type_t type, + std::set files ) +{ + // A Level 3 EC is added unilaterally; it can't knock out a lower level. + if( ec_level(type) == 3 ) { + if( files.empty() ) { + auto elem = cbl_enabled_exception_t(enabled, location, type); + apply(elem); + return true; + } + + for( size_t file : files ) { + auto elem = cbl_enabled_exception_t(enabled, location, type, file); + apply(elem); + } + return true; + } + + /* + * std::remove_if cannot be used with std::set because its elements are const. + * std::set::erase_if became available only in C++20. + */ + if( enabled ) { // remove any disabled + if( files.empty() ) { + auto p = begin(); + while( end() != (p = std::find_if( begin(), end(), + [ec = type]( const auto& elem ) { + return + !elem.enabled && + ec_cmp(ec, elem.ec); } )) ) { + erase(p); + } + } else { + for( size_t file: files ) { + auto p = begin(); + while( end() != (p = std::find_if( begin(), end(), + [ec = type, file]( const auto& elem ) { + return + !elem.enabled && + file == elem.file && + ec_cmp(ec, elem.ec); } )) ) { + erase(p); + } + } + } + auto elem = cbl_enabled_exception_t(enabled, location, type); + apply(elem); + return true; + } + assert(!enabled); + assert(ec_level(type) < 3); + + if( files.empty() ) { + if( type == ec_all_e ) { + clear(); + return true; + } + // Remove any matching Level-2 or Level-3 ECs, regardless of their files. + auto p = begin(); + while( end() != (p = std::find_if( begin(), end(), + [ec = type]( const auto& elem ) { + return + elem.enabled && + elem.ec != ec_all_e && + ec_cmp(ec, elem.ec); } )) ) { + erase(p); + } + // Keep the EC as an exception if a higher-level would othewise apply. + p = std::find_if( begin(), end(), + [ec = type]( const auto& elem ) { + return + elem.enabled && + (elem.ec == ec_all_e || elem.ec < ec) && + elem.file == 0 && + ec_cmp(ec, elem.ec); } ); + if( p != end() ) { + auto elem = cbl_enabled_exception_t(enabled, location, type); + apply(elem); + } + } else { + // Remove any matching or lower-level EC for the same file. + for( size_t file: files ) { + auto p = begin(); + while( end() != (p = std::find_if( begin(), end(), + [ec = type, file]( const auto& elem ) { + return + elem.enabled && + // ec is higher level and matches + (ec == ec_all_e || ec <= elem.ec) && + file == elem.file && + ec_cmp(ec, elem.ec); } )) ) { + erase(p); + } + // Keep the EC as an exception if a higher-level would othewise apply. + p = std::find_if( begin(), end(), + [ec = type, file]( const auto& elem ) { + return + elem.enabled && + (elem.ec == ec_all_e || elem.ec < ec) && + file == elem.file && + ec_cmp(ec, elem.ec); } ); + if( p != end() ) { + auto elem = cbl_enabled_exception_t(enabled, location, type, file); + apply(elem); + } + } + } + + return true; +} + +const cbl_enabled_exception_t * +cbl_enabled_exceptions_t::match( ec_type_t type, size_t file ) { + auto output = enabled_exception_match( begin(), end(), type, file ); + return output != end()? &*output : NULL; +} + +class choose_declarative { + size_t program; + public: + choose_declarative( size_t program ) : program(program) {} + + bool operator()( const cbl_declarative_t& dcl ) { + return dcl.global || program == symbol_at(dcl.section)->program; + } +}; + +bool +sort_supers_last( const cbl_declarative_t& a, const cbl_declarative_t& b ) { + if( symbol_at(a.section)->program == symbol_at(b.section)->program ) { + return a.section < b.section; + } + return symbol_at(a.section)->program > symbol_at(b.section)->program; +} + +cbl_field_t * new_temporary_decl(); + +/* + * For a program, create a "DECLARATIVES" entry in the symbol table, + * representing eligible declarative sections in priorty order: + * in-program first, followed by any global declaratives in parent + * programs. These decribe the USE criteria declared for each + * declarative section. + * + * The field's initial value is actually an array of + * cbl_declarartive_t, in which the first element is unused, except + * that array[0].section represents the number of elements, starting + * at array[1]. + * + * The returned value is the declarative's symbol index. It is passed + * to match_exception, which scans it for a declarative whose criteria + * match the raised exception. That function returns the + * cbl_declarative_t::section, which the program then uses to PERFORM + * that section. + */ +size_t +symbol_declaratives_add( size_t program, + const std::list& dcls ) +{ + auto n = dcls.size(); + if( n == 0 ) return 0; + + auto blob = new cbl_declarative_t[ 1 + n ]; + + auto pend = std::copy_if( dcls.begin(), dcls.end(), blob + 1, + choose_declarative(program) ); + + std::sort( blob + 1, pend, sort_supers_last ); + + // Overload blob[0].section to be the count. + blob[0].section = (pend - blob) - 1; + + size_t len = reinterpret_cast(pend) + - reinterpret_cast(blob); + assert(len == (blob[0].section + 1) * sizeof(blob[0])); + + // Construct a "blob" in the symbol table. + static int blob_count = 1; + char achBlob[32]; + sprintf(achBlob, "_DECLARATIVE_BLOB%d_", blob_count++); + + cbl_field_data_t data = { .memsize = capacity_cast(len), + .capacity = capacity_cast(len), + .initial = reinterpret_cast(blob), + .picture = reinterpret_cast(blob) }; + cbl_field_t field = { 0, FldBlob, FldInvalid, constant_e, + 0, 0, 0, cbl_occurs_t(), 0, "", + 0, {}, data, NULL }; + strcpy(field.name, achBlob); + + auto e = symbol_field_add(program, &field); + parser_symbol_add(cbl_field_of(e)); + return symbol_index(e); +} + +/* + * Generate the code to evaluate declaratives. This is the "secret + * section" right after END DECLARATIVES. Its name is + * _DECLARATIVES_EVAL, and it is performed after every statement that + * could raise an exception. + * + * The code calls the library routine __gg__match_exception, which + * compares the raised exception to the criteria set by the USE + * statements in the DECLARATIVES super-section. It returns an + * integer, which is an index to the label in the symbol table that + * defines the section for the matching USE criteria. + * + * The generated code is a sequence of IF statements comparing the + * returned integer to that of each declarative. If equal, that + * section is PERFORMed, and control branches to the end of this + * section, and thence back to the statement it came from. + */ +#include "io.h" +size_t current_file_index(); +file_status_t current_file_handled_status(); + +void +declarative_runtime_match( cbl_field_t *declaratives, cbl_label_t *lave ) { + if( getenv("SHOW_PARSE") ) + { + fprintf(stderr, "( %d ) %s: \n", cobol_location().first_line, __func__); + } + if( getenv("TRACE1") ) + { + gg_printf(">>>>>>( %d )(%s) declaratives:%s lave:%s\n", + build_int_cst_type(INT, cobol_location().first_line), + gg_string_literal(__func__), + gg_string_literal(declaratives->name), + gg_string_literal(lave->name), + NULL_TREE); + } + static auto yes = new_temporary(FldConditional); + static auto psection = new_temporary(FldNumericBin5); + + // Send blob, get declarative section index. + auto index = new_temporary(FldNumericBin5); + parser_match_exception(index, declaratives); + + auto p = declaratives->data.initial; + const auto dcls = reinterpret_cast(p); + size_t ndcl = dcls[0].section; // overloaded + + // Compare returned index to each section index. + for( auto p = dcls + 1; p < dcls + 1 + ndcl; p++ ) { + parser_set_numeric( psection, p->section ); + parser_relop( yes, index, eq_op, psection ); + parser_if( yes ); + auto section = cbl_label_of(symbol_at(p->section)); + parser_perform(section); + parser_label_goto(lave); + parser_else(); + parser_fi(); + } + + parser_label_label(lave); + + // A performed declarative may clear the raised exception with RESUME. + // If not cleared and fatal, the default handler will exit. + parser_check_fatal_exception(); +} + +ec_type_t +ec_type_of( const cbl_name_t name ) { + auto p = std::find_if( __gg__exception_table, __gg__exception_table_end, + [name]( const ec_descr_t& descr ) { + return 0 == strcasecmp(name, descr.name); + } ); + return p == __gg__exception_table_end? ec_none_e : p->type; +} + diff --git a/gcc/cobol/exceptg.h b/gcc/cobol/exceptg.h new file mode 100644 index 00000000000..4500c0f38d2 --- /dev/null +++ b/gcc/cobol/exceptg.h @@ -0,0 +1,61 @@ + /* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _EXCEPTL_H_ +#define _EXCEPTL_H_ + +/* This file contains exception processing declarations needed by the gcc/cobol + compilation. It's not included in the libgcobol compilation. */ + +extern const char * ec_type_str( ec_type_t type ); +extern ec_disposition_t ec_type_disposition( ec_type_t type ); + +extern void declarative_runtime_match(cbl_field_t *declaratives, + cbl_label_t *lave ); + +static inline ec_disposition_t +ec_implemented( ec_disposition_t disposition ) { + return ec_disposition_t( size_t(disposition) & ~0x80 ); +} + + +// >>TURN arguments +struct cbl_exception_files_t { + ec_type_t type; + size_t nfile; + size_t *files; + bool operator<( const cbl_exception_files_t& that ) { + return type < that.type; + } +}; + +size_t symbol_declaratives_add( size_t program, + const std::list& dcls ); + +#endif diff --git a/gcc/cobol/gcobc b/gcc/cobol/gcobc new file mode 100755 index 00000000000..93e1bd302a6 --- /dev/null +++ b/gcc/cobol/gcobc @@ -0,0 +1,465 @@ +#! /bin/sh -e + +# +# COPYRIGHT +# The gcobc program is in public domain. +# If it breaks then you get to keep both pieces. +# +# This file emulates the GnuCOBOL cobc compiler to a limited degree. +# For options that can be "mapped" (see migration-guide.1), it accepts +# cobc options, changing them to the gcobol equivalents. Options not +# recognized by the script are passed verbatim to gcobol, which will +# reject them unless of course they are gcobol options. +# +# User-defined variables, and their defaults: +# +# Variable Default Effect +# echo none If defined, echo the gcobol command +# gcobcx none Produce verbose messages +# gcobol ./gcobol Name of the gcobol binary +# GCOBCUDF PREFIX/share/cobol/udf/ Location of UDFs to be prepended to input +# +# By default, this script includes all files in $GCOBCUDF. To defeat +# that behavior, use GCOBCUDF=none. +# +# A list of supported options is produced with "gcobc -HELP". +# +## Maintainer note. In modifying this file, the following may make +## your life easier: +## +## - To force the script to exit, either set exit_status to 1, or call +## the error function. +## - As handled options are added, add them to the HELP here-doc. +## - The compiler can produce only one kind of output. In this +## script, that's known by $mode. Options that affect the type of +## output set the mode variable. Everything else is appended to the +## opts variable. +## + +if [ "$COBCPY" ] +then + copydir="-I$COBCPY" +fi + +if [ "$COB_COPY_DIR" ] +then + copydir="-I$COB_COPY_DIR" +fi + +# TODO: this file likely needs to query gcobol for its shared path instead +udf_default="${0%/*}/../share/gcobol/udf" +if [ ! -d "$udfdir" ] +then + # the one above is the installed one from the packages this one was previously used + udf_default="${0%/*}/../share/cobol/udf" +fi +udfdir="${GCOBCUDF:-$udf_default}" + +if [ -d "$udfdir" ] +then + for F in "$udfdir"/* + do + if [ -f "$F" ] + then + includes="$includes -include $F " + fi + done +else + if [ "${GCOBCUDF:-none}" != "none" ] + then + echo warning: no such directory: "'$GCOBCUDF'" + fi +fi + +exit_status=0 +skip_arg= +opts="$copydir ${dialect:--dialect mf} $includes" +mode=-shared + +incomparable="has no comparable gcobol option" + +if [ "${gcobcx:-0}" -gt 1 ] +then + set -x +fi + +error() { + echo "error: $1" >&2 + exit_status=1 +} +warn() { + echo "warning: $1 ignored" >&2 +} +ignore_arg() { + warn "$1" + skip_arg="$1" +} +no_warn() { :; } # silence is golden + +help() { + cat<>IF ) +to have the value of +.Ar expr . +.It Fl E +Write the CDF-processed \*[lang] input to standard output in free-form +reference format. Certain non-\*[lang] markers are included in the +output to indicate where copybook files were included. For +line-number consistency with the input, blank lines are retained. +.Pp +Unlike the C compiler, This option does not prevent compilation. +To prevent compilation, use the option +.D1 Fl Sy fsyntax-only +also. +.It Fl fdefaultbyte Ns Li = Ns Ar value +Use +.Ar value , +a number between 0 and 255, as the default value for all +WORKING-STORAGE data items that have no VALUE clause. By default, +alphanumeric data items are initialized with blanks, and numeric data +items are initialized to zero. This option overrides the default with +.Ar value . +.It Fl fsyntax-only +Invoke only the parser. Check the code for syntax errors, but don't do +anything beyond that. +.It Fl copyext Ar ext +For the CDF directive +.D1 COPY Ar name +if +.Ar name +is unquoted, several varieties of +.Ar name +are tried, as described below under +.Xr Copybooks Ns . +The +.Fl copyext +option extends the names searched to include +.Ar ext . +If +.Ar ext +is all uppercase or all lowercase, both forms are tried, with preference given to the one supplied. If +.Ar ext +is mixed-case, only that version is tried. +For example, with +.D1 Fl copyext Ar .abc +given the CDF directive +.D1 COPY name +.Nm +will add to possible names searched +.Ql name.abc +and +.Ql name.ABC +in that order. +.It Fl ffixed-form +Use strict +.Em "Reference Format" +in reading the \*[lang] input: +72-character lines, with a 6-character sequence area, and an indicator +column. Data past column 72 are ignored. +.It Fl ffree-form +Force the \*[lang] input to be interpreted as +.Em "free format" . +Line breaks are insignificant, except that +.Ql * +at the start of a line acts as a comment marker. +Equivalent to +.Fl indicator-column Ar 0 Ns Li . +. +.It Fl findicator-column +describes the location of the Indicator Area in a \*[lang] file +in +.Em "Reference Format" , +where the first 6 columns \(em known as the +.Dq "Sequence Number Area" +\(em are ignored, and the 7th column \(em the Indicator +Area \(em may hold a character of significance to the compiler. +.Pp +Although +.Em "reference format" , +strictly speaking, ignores data after column 72, +with this option +.Nm +accepts long \*[lang] lines, sometimes known as +.Em "extended source format" . +Text past column 72 is treated as ordinary \*[lang] text. (Line +continuation remains in effect, however, +provided no text appears +.Em past +column 72.) +.Pp +There is no maximum line length. Regardless of source code format, +the entire program could appear on one line. +.Pp +By default, +.Nm +auto-detects the source code format by examining the +.Em "sequence number area" +of the first line of the first file: if those characters are all +digits or blanks, the file is assumed to be in +.Em "reference format" , +with the indicator area in column 7. +.Pp +. +.It Fl fcobol-exceptions Ar exception Op Ns , Ns Ar exception Ns ... +By default, no exception condition is enabled (including fatal ones), +and by the ISO standard exception conditions are enabled only via the +CDF +.Sy "TURN" +directive. This option enables one or more exception conditions by +default, as though +.Sy TURN +had appeared at the top of the first source code file. +This option may also appear more than once on the command line. +.Pp +The value of +.Ar exception +is a Level 1, 2, or 3 exception condition name, as described by +\*[isostd]. +.Ql EC-ALL +means enable all exceptions. +.Pp +The +.Fl fno-cobol-exceptions +form turns off +.Ar exception , +just as though +.D1 >>TURN Ar exception CHECKING OFF +had appeared. +.Pp +Not all exception conditions are implemented. Any that are not +produce a warning message. +. +.It Fl fmax-errors Ar nerror +.Ar nerror +represents the number of error messages produced. Without this option, +.Nm +attempts to recover from a syntax error by resuming compilation at the +next statement, continuing until end-of-file. With it, +.Nm +counts the messages as they're produced, and stops when +.Ar nerror +is reached. +.It Fl fstatic-call Ns , Fl fno-static-call +With +.Fl fno-static-call , +.Nm +never uses static linking for +.D1 Sy CALL Ar program +By default, or with +.Fl fstatic-call , +if +.Ar program +is an alphanumeric literal, +.Nm +uses static linkage, meaning the compiler produces an external symbol +.Ar program +for the linker to resolve. +(In the future, that will work with +.Sy CONSTANT +data items, too.) With static linkage, if +.Ar program +is not supplied by the source code module or another object file or library +at build time, the linker will produce an +.Dq "unresolved symbol" +error. With +.Fl fno-static-call , +.Nm +always uses dynamic linking. +.Pp +This option affects the +.Sy CALL +statement for literals only. If +.Ar program +is a non-constant data item, it is always resolved using dynamic +linking, with +.Xr dlsym 3 Ns Li , +because its value is determined at run time. +.It Fl dialect Ar dialect-name +By default, +.Nm +accepts \*[lang] syntax as defined by \*[isostd], with some +extensions for backward compatibility with COBOL-85. To make the +compiler more generally useful, some additional syntax is supported by +this option. +.Pp +The value of +.Ar dialect-name +may be +.Bl -tag -compact +.It ibm +to indicate IBM COBOL 6.3 syntax, specifically +.D1 STOP . +.It gnu +to indicate GnuCOBOL syntax +.It mf +to indicate MicroFocus syntax, specifically +.Sy LEVEL 78 +constants. +.El +.Pp +Only a few such non-standard constructs are accepted, and +.Nm +makes no claim to emulate other compilers. But to the extent that a +feature is popular but nonstandard, this option provides a way to +support it, or add it. +. +.It Fl include Ar filename +Process +.Ar filename +as if +.D1 COPY Dq Ar filename +appeared as the first line of +the primary source file. If +.Ar filename +is not an absolute path, the directory searched is the current working +directory, not the directory containing the main source file. The +name is used verbatim. No permutations are applied, and no +directories searched. +.Pp +If multiple +.Fl include +options are given, the files are included in +the order they appear on the command line. +. +.It Fl preprocess Ar preprocess-filter +After all CDF text-manipulation has been applied, and before the +prepared \*[lang] is sent to the +.Sy cobol1 +compiler, the input may be +further altered by one or more filters. In the tradition of +.Xr sed 1 , +each +.Ar preprocess-filter +reads from standard input and writes to standard output. +.Pp +To supply options to +.Ar preprocess-filter , +use a comma-separated string, similar to how linker options are supplied to +.Fl Sy Wl . +(Do not put any spaces after the commas, because the shell will treat it as an option separator.) +.Nm +replaces each comma with a space when +.Ar preprocess-filter +is invoked. For example, +.D1 Fl preprocess Li tee,output.cbl +invokes +.Xr tee 1 +with the output filename argument +.Pa output.cbl , +causing a copy of the input to be written to the file. +.Pp +.Nm +searches the current working directory and the PATH environment +variable directories for an executable file whose name matches +.Ar preprocess-filter . +The first one found is used. If none is found, an error is reported +and the compiler is not invoked. +.Pp +The +.Fl preprocess +option may appear more than once on the command line. Each +.Ar preprocess-filter +is applied in turn, in order of appearance. +.Pp +The +.Ar preprocess-filter +should return a zero exit status, indicating success. If it returns a +nonzero exit status, an error is reported and the compiler is not +invoked. +. +.It Fl fflex-debug Ns Li , Fl fyacc-debug +produce messages useful for compiler development. The +.Fl fflex-debug +option prints the tokenized input stream. The +.Fl fyacc-debug +option shows the shift and reduce actions taken by the parser. +.El +. +.Sh COMPILATION SCENARIOS +.D1 gcobol Ar xyz.cob +.D1 gcobol -main Ar xyz.cob +.D1 gcobol -main= Ns Ar xyz.cob Ar xyz.cob +These are equivalent. The +.Ar xyz.cob +code is compiled and a +.Fn main +function is +inserted that calls the first PROGRAM-ID in the +.Ar xyz.cob +source file. +.Pp +.D1 gcobol -nomain Ar xyz.cob Ar elsewhere.o +The +.Fl nomain +option prevents a +.Fn main +function from being generated by the gcobol compiler. +A +.Fn main +entry point must be present in the +.Ar elsewhere.o +module; without it the +linker will report a +.Dq "missing main" +error. +.Pp +.D1 gcobol Ar aaa.cob Ar bbb.cob Ar ccc.cob +.D1 gcobol -main Ar aaa.cob Ar bbb.cob Ar ccc.cob +The two commands are equivalent. The three source code modules are compiled and +linked together along with a generated +.Fn main +function that calls the first +PROGRAM-ID in the +.Ar aaa.cob +module. +.Pp +.D1 gcobol Ar aaa.cob Ar bbb.cob Fl main Ar ccc.cob +.D1 gcobol -main Ns = Ns Ar ccc.cob Ar aaa.cob Ar bbb.cob Ar ccc.cob +These two commands have the same result: An +.Ar a.out +executable is created that +starts executing at the first PROGRAM-ID in +.Ar ccc.cob . +.Pp +.D1 gcobol -main Ns = Ns Ar bbb.cob:b-entry Ar aaa.cob Ar bbb.cob Ar ccc.cob +An +.Ar a.out +executable is created that starts executing at the PROGRAM-ID +.Ar "b-entry" . +.Pp +.D1 gcobol -c Ar aaa.cob +.D1 gcobol -c -main Ar bbb.cob +.D1 gcobol -c Ar ccc.cob +.D1 gcobol Ar aaa.o Ar bbb.o Ar ccc.o +The first three commands each create a .o file. The +.Ar bbb.o +file will contain a +.Fn main +entry point that calls the first PROGRAM-ID in +.Ar bbb . +The fourth links the three .o files into an +.Ar a.out . +. +.Sh EBCDIC +The +.Fl finternal-ebcdic +option is useful when working with mainframe \*[lang] programs intended +for EBCDIC-encoded files. With this option, while the \*[lang] text +remains in ASCII, the character literals and field initial values +produce EBCDIC strings in the compiled binary, and any character data +read from a file are interpreted as EBCDIC data. The file data are +not +.Em converted ; +rather, the file is assumed to use EBCDIC representation. String +literals in the \*[lang] text +.Em are +converted, so that they can be compared meaningfully with data in the file. +.Pp +Only file data and character literals are affected. Data read from +and written to the environment, or taken from the command line, are +interpreted according the +.Xr locale 7 +in force during execution. The same is true of +.Sy ACCEPT +and +.Sy DISPLAY . +Names known to the operating system, such as file names and the names +of environment variables, are processed verbatim. +.Pp +At the present time, this is an all-or-nothing setting. Support for +.Sy USAGE +and +.Sy CODESET , +which would allow conversion between encodings, remains a future goal. +.Pp +See also +.Sx "Feature-set Variables" , +below. +. +.Sh REDEFINES ... USAGE POINTER +Per ISO, an item that +.Sy REDEFINES +another may not be larger than the item it redefines, unless that item +has LEVEL 01 and is not EXTERNAL. In +.Nm , +using +.Fl dialect Ar ibm , +this rule is relaxed for +.Sy REDEFINES +with +.Sy USAGE POINTER +whose redefined member is a 4-byte +.Sy USAGE COMP-5 +(usually +.Sy PIC S9(8) Ns ), +or vice-versa. +In that case, the redefined member is re-sized to be 8 bytes, to +accommodate the pointer. This feature allows pointer arithmetic on a +64-bit system with source code targeted at a 32-bit system. +.Pp +See also +.Sx "Feature-set Variables" , +below. +. +.Sh IMPLEMENTATION NOTES +.Nm +is a gcc compiler, and follows gcc conventions where applicable. +Sometimes those conventions (and user expectations) conflict with +common Mainframe practice. Unless required of the compiler by the ISO +specification, any such conflicts are resolved in favor of gcc. +.Ss Linking +Unlike, C, the \*[lang] +.Sy CALL +statement implies dynamic linking, because for +.D1 Sy CALL Ar program +.Ar program +can be a variable whose value is determined at runtime. +However, the parameter may also be compile-time constant, either an +alphanumeric literal, or a +.Sy CONSTANT +data item. +.Pp +.Nm +supports static linking where possible, unless defeated by +.Fl fno-static-call . +If the parameter value is known at compile time, the compiler produces +an external reference to be resolved by the linker. The referenced +program is normally supplied via an object module, a static library, +or a shared object. If it is not supplied, the linker will report an +.Dq "unresolved symbol" +error, either at build time or, if using a shared object, when the +program is executed. This feature informs the programmer of the error +at the earliest opportunity. +.Pp +Programs that are expected to execute +correctly in the presence of an unresolved symbol (perhaps because the +program logic won't require that particular +.Sy CALL ) +can use the +.Fl no-static-call +option. That forces all +.Sy CALL +statements to be resolved dynamically, at runtime. +.ig +Programs that are expected to execute +correctly in the presence of an unresolved symbol (perhaps because the +program logic won't require that particular +.Sy CALL ) +can use linker options to produce an executable anyway. +.Pp +One corner case yet remains. The +.Sy CALL +statement includes an +.Sy "ON ERROR" +clause whose purpose is to handle errors arising when the called program is not found. +Control is transferred to the +.Sy "ON ERROR" +clause when the +.Sy EC-PROGRAM-NOT-FOUND +exception condition is raised. That exception condition is not raised in +.Nm +when: +.Bl -bullet -compact +.It +the +.Sy CALL +parameter +is known at compile time, i.e., is an alphanumeric literal or +.Sy CONSTANT +data item, and +.It +the executable was generated with the linker option to ignore unresolved symbols. +.El +In that case, the program is terminated with a signal. No recovery with +.Sy "ON ERROR" +is possible. +.Pp +Should your program meet those particular conditions, all is not lost. +There are workarounds, and an option could be added to use dynamic +linking for all +.Sy CALL +statement, regardless of compile-time constants. +.. +. +.Ss Implemented Exception Conditions +Not all Exception Conditions are implemented. Any attempt to enable +an EC that that is not implemented produces a warning message. +The following are implemented: +.Pp +.Bl -tag -offset 5n -compact +.It EC-FUNCTION-ARGUMENT +for the following functions: +.Bl -item -compact +.It +ACOS +.It +ANNUITY +.It +ASIN +.It +LOG +.It +LOG10 +.It +PRESENT-VALUE +.It +SQRT +.El +.It EC-SORT-MERGE-FILE-OPEN +.It EC-BOUND-SUBSCRIPT +subscript not an integer, less than 1, or greater than occurs +.It EC-BOUND-REF-MOD +refmod start not an integer, start less than 1, start greater than +variable size, length not an integer, length less than 1, and +start+length exceeds variable size +.It EC-BOUND-ODO +DEPENDING not an integer, greater than occurs upper limit, +less than occurs lower limit, and subscript greater than DEPENDING for sending item +.It EC-SIZE-ZERO-DIVIDE +for both fixed-point and floating-point division +.It EC-SIZE-TRUNCATION +.It EC-SIZE-EXPONENTIATION +.El +.Pp +As of this writing, no \*[lang] compiler documents a complete +implementation of \*[isostd] Exception Conditions. +.Nm +will give priority to those ECs that the user community deems most +valuable. +. +.Sh EXTENSIONS TO ISO \*[lang] +Standard \*[lang] has no provision for environment variables as defined +by Unix and Windows, or command-line arguments. +.Nm +supports them using syntax similar to that of GnuCOBOL. ISO and IBM +also define incompatible ways to return the program's exit status to +the operating system. +.Nm +supports IBM syntax. +. +.Ss Environment Variables +To read an environment variable: +.Pp +.D1 ACCEPT Ar target Li FROM ENVIRONMENT Ar envar +.Pp +where +.Ar target +is a data item defined in +.Sy "DATA DIVISION" , +and +.Ar envar +names an environment variable. +.Ar envar +may be a string literal or alphanumeric data item whose value is the +name of an environment variable. The value of the named environment +variable is moved to +.Ar target . +The rules are the same as for +.Sy MOVE . +.Pp +To write an environment variable: +.Pp +.D1 SET ENVIRONMENT Ar envar Li TO Ar source +.Pp +where +.Ar source +is a data item defined in +.Sy DATA DIVISION , +and +.Ar envar +names an environment variable. +.Ar envar +again may be a string literal or alphanumeric data item whose value is the +name of an environment variable. The value of the named environment +variable is set to the value of +.Ar source . +. +.Ss Command-line Arguments +To read command-line arguments, use the registers +.Sy COMMAND-LINE +and +.Sy COMMAND-LINE-COUNT +in an +.Sy ACCEPT +statement (only). +Used without a subscript, +.Sy COMMAND-LINE +returns the whole command line as a single string. With a subscript, +.Sy COMMAND-LINE +is a table of command-line arguments. For example, if the +program is invoked as +.sp +.D1 Sy ./program Fl i Ar input Ar output +.sp +then +.sp +.D1 ACCEPT target FROM COMMAND-LINE(3) +.sp +moves +.Ar input +into +.Ar target . +The program name is the first thing in the whole command line and is +found in COMMAND-LINE(1) +.Sy COMMAND-LINE +table. +.Pp +To discover how many arguments were provided on the command line, use +.sp +.D1 ACCEPT Ar target Li FROM COMMAND-LINE-COUNT +.sp +If +.Sy ACCEPT +refers to a nonexistent environment variable or command-line +argument, the target is set to +.Sy LOW-VALUES . +.Pp +The system command line parameters can also be accessed through the LINKAGE +SECTION in the program where execution starts. The data structure looks like +this: +.Bd -literal + linkage section. + 01 argc pic 999. + 01 argv. + 02 argv-table occurs 1 to 100 times depending on argc. + 03 argv-element pointer. + 01 argv-string pic x(100) . +.Ed +and the code to access the third parameter looks like this +.Bd -literal + procedure division using by value argc by reference argv. + set address of argv-string to argv-element(3) + display argv-string +.Ed +. +.Ss #line directive +The parser accepts lines in the form +.D1 #line Ar lineno Dq Ar filename Ns . +The effect is to set the current line number to +.Ar lineno +and the current input filename to +.Ar filename . +Preprocessors may use this directive to control the filename and line +numbers reported in error messages and in the debugger. +. +.Ss SELECT ... ASSIGN TO +In the phrase +.sp +.D1 ASSIGN TO Ar filename +.sp +.Ar filename +may appear in quotes or not. If quoted, it represents a filename as +known to the operating system. If unquoted, it names either a data +element or an environment variable containing the name of a file. +If +.Ar filename +matches the name of a data element, that element is used. If not, +resolution of +.Ar filename +is deferred until runtime, when the name must appear in the program's +environment. +. +.Sh ISO \*[lang] Implementation Status +.Ss USAGE Data Types +.Nm +supports the following +.Sy USAGE IS +clauses: +.Bl -tag -compact -width POINTER\0 +.It Sy INDEX +for use as an index in a table. +.It Sy POINTER +for variables whose value is the address of an external function, +.Sy PROGRAM-ID , +or data item. Assignment is via the +.Sy SET +statement. +.It Sy BINARY, Sy COMP , Sy COMPUTATIONAL, Sy COMP-4, Sy COMPUTATIONAL-4 +big-endian integer, 1 to 16 bytes, per PICTURE. +.It Sy COMP-1 , Sy COMPUTATIONAL-1 , Sy FLOAT-BINARY-32 +IEEE 754 single-precision (4-byte) floating point, as provided by the +hardware. +.It Sy COMP-2 , Sy COMPUTATIONAL-2 , Sy FLOAT-BINARY-64 +IEEE 754 double-precision (8-byte) floating point, as provided by the +hardware. +.It Sy COMP-3 , Sy COMPUTATIONAL-3, Sy PACKED-DECIMAL +currently unimplemented. +.It Sy COMP-5 , Sy COMPUTATIONAL-5 +little-endian integer, 1 to 16 bytes, per +.Sy PICTURE. +.It Sy FLOAT-BINARY-128 , FLOAT-EXTENDED +implements 128-bit floating point, per IEEE 754. +.El +.Pp +.Nm +supports ISO integer +.Sy BINARY- +types, most of which alias +.Sy COMP-5. +. +.hw unsigned +.sp +.TS +LB LB LB LB +LB LB LB LB +L L L L . +COMP-5 Compatible +Picture BINARY Type Bytes Value + T{ +BINARY-CHAR [UNSIGNED] +T} 1 0 \(em 256 +S9(1...4) T{ +BINARY-CHAR SIGNED +T} 1 -128 \(em +127 +\09(1...4) T{ +BINARY-SHORT [UNSIGNED] +T} 2 0 \(em 65535 +S9(1...4) T{ +BINARY-SHORT SIGNED +T} 2 -32768 \(em +32767 +\09(5...9) T{ +BINARY-LONG [UNSIGNED] +T} 4 0 \(em 4,294,967,295 +S9(5...9) T{ +BINARY-LONG SIGNED +T} 4 T{ +-2,147,483,648 \(em +2,147,483,647 +T} +\09(10...18) T{ +BINARY-LONG-LONG [UNSIGNED] +T} 8 T{ +0 \(em 18,446,744,073,709,551,615 +T} +S9(10...18) T{ +BINARY-LONG-LONG SIGNED +T} 8 T{ +-9,223,372,036,854,775,808 \(em +9,223,372,036,854,775,807 +T} +.TE +.Pp +These define a size (in bytes) and cannot be +used with a +.Sy PICTURE +clause. +Per the ISO standard, +.Sy SIGNED +is the default for the +.Sy "BINARY-" Ns Ar type +aliases. +.Pp +All computation \(em both integer and floating point \(em is done +using 128-bit intermediate forms. +. +.Ss Environment Names +In +.Nm +.sp +.Dl DISPLAY UPON +.sp +maps +.Sy SYSOUT +and +.Sy STDOUT +to standard output, and +.Sy SYSPUNCH , +.Sy SYSPCH +and +.Sy STDERR +to standard error. +. +.Ss Exit Status +.Nm +supports the ISO syntax for returning an exit status to the operating system, +.Pp +.D1 STOP RUN Oo WITH Oc Bro NORMAL | ERROR Brc Oo STATUS Oc Ar status +.Pp +In addition, +.Nm +also supports the IBM syntax for returning an exit status to +the operating system. Use the +.Sy RETURN-CODE +register: +.Bd -literal -offset indent +MOVE ZERO TO RETURN-CODE. +GOBACK. +.Ed +.Pp +The +.Sy RETURN-CODE +register is defined as a 4-byte binary integer. +.ig +.Pp +The ISO standard supports an extended form of +.Sy GOBACK : +.Pp +.D1 GOBACK {ERROR | NORMAL} WITH Ar status +.Pp +where +.Ar status +is a numeric data item or literal. This syntax has the same effect as: +.Bd -literal -offset indent +MOVE status TO RETURN-CODE. +GOBACK. +.Ed +The use of +.Sy ERROR +or +.Sy NORMAL +has no effect; the two are interchangeable. +.. +. +.Ss Compiler-Directing Facility (CDF) +The CDF should be used with caution because no comprehensive test +suite has been identified. +. +.Ss Conditional Compilation +.Bl -tag -width >>DEFINE +.It >> Ns Sy DEFINE Ar name Sy AS Bro Ar expression Li | Sy PARAMETER Brc Op Sy OVERRIDE +Define +.Ar name +as a compilation variable to have the value +.Ar expression . +If +.Ar name +was previously defined, +.Sy OVERRIDE +is required, else the directive is invalid. +.Sy AS PARAMETER +is accepted, but has no effect in +.Nm . +. +.It >> Ns Sy DEFINE Ar name AS Sy OFF +releases the definition +.Ar name , +making it subsequently invalid for use. +.\" ISO requires AS; cdf.y does not. +. +.It >> Ns Sy IF Ar cce Ar text Oo >> Ns Sy ELSE Ar alt-text Oc Li >> Ns Sy END-IF +evaluates +.Ar cce , +a +.Em "constant conditional expression\/" , +for conditional compilation. +If a name, +.Ar cce +may be defined with the +.Fl D +command-line parameter. If true, the \*[lang] text +.Ar text +is compiled. If false, +.Ar else-text , +if present, is compiled. +.Bo Sy IS Bo Sy NOT Bc Bc Sy DEFINED +is supported. Boolean literals are not supported. +. +.It >> Ns Sy EVALUATE +Not implemented. +.El +. +.Ss Other CDF Directives +.Bl -tag -width >>PROPAGATE +.It >> Ns Sy CALL-CONVENTION Ar convention +.Ar convention +may be one of: +.Bl -tag -compact +.It Sy \*[lang] +Use standard \*[lang] case-insensitive symbol-name matching. For +.Sy CALL Dq Ar name , +.Ar name +is rendered by the compiler in lowercase. +.It Sy C +Use case-sensitive symbol-name matching. The +.Sy CALL +target is not changed in any way; it is used verbatim. +.It Sy VERBATIM +An alias for >>\c +.Sy "CALL-CONVENTION C" . +.El +.It >> Ns Sy COBOL-WORDS EQUATE Ar keyword Sy WITH Ar alias +makes +.Ar alias +a synonym for +.Ar keyword . +.It >> Ns Sy COBOL-WORDS UNDEFINE Ar keyword +.Ar keyword +is removed from the \*[lang] grammar. Use of it in a program will provoke +a syntax error from the compiler. +.It >> Ns Sy COBOL-WORDS SUBSTITUTE Ar keyword Sy BY Ar new-word +.Ar keyword +is deleted as a keyword from the grammar, replaced by +.Ar new-word . +.Ar keyword +may thereafter be used as a user-defined word. +.It >> Ns Sy COBOL-WORDS RESERVE Ar new-word +Treat +.Ar new-word +as a \*[lang] keyword. It cannot be used by the program, either as a +keyword or as a user-defined word. +. +.It >> Ns Sy DISPLAY Ar string ... +Write +.Ar string +to standard error as a warning message. +.It >> Ns Sy SOURCE Ar format +.Ar format +may be one of: +.Bl -tag -compact +.It Sy FIXED +Source conforms to \*[lang] Reference Format with unlimited line length. +.It Sy FREE +Line endings and indentation are ignored by the compiler, except that a +.Ql "*" +at the beginning of a line is recognized as a comment. +.El +.El +.Pp +.Bl -tag -width >>PROPAGATE -compact +.It >> Ns Sy FLAG-02 +Not implemented. +.It >> Ns Sy FLAG-85 +Not implemented. +.It >> Ns Sy FLAG-NATIVE-ARITHMETIC +Not implemented. +.It >> Ns Sy LEAP-SECOND +Not implemented. +.It >> Ns Sy LISTING +Not implemented. +.It >> Ns Sy PAGE +Not implemented. +.It >> Ns Sy PROPAGATE +Not implemented. +.It >> Ns Sy TURN Oo +.Ar ec Oo Ar file Li ... Oc ... +.Oc Sy CHECKING Bro Oo Sy ON Oc Oo Oo Sy WITH Oc Sy LOCATION Oc | Sy OFF Brc +Enable (or, with +.Sy OFF , +disable) exception condition +.Ar ec +optionally associated with the file connectors +.Ar file . +If +.Sy LOCATION +is specified, +.Nm +reports at runtime the source filename and line number of the +statement that triggered the exception condition. +.El +. +.Ss Feature-set Variables +Some command-line options affect CDF +.Em "feature-set" +variables that are special to +.Nm . +They can be set and tested using +.Sy >>DEFINE +and +.Sy >>IF , +and are distinguished by a leading +.Ql \&% +in the name, which is otherwise invalid in a \*[lang] identifier: +.Pp +.Bl -tag -compact +.It Sy %EBCDIC-MODE +is set by +.Fl finternal-ebcdic . +.It Sy %64-BIT-POINTER +is implied by +.Fl "dialect ibm" . +.El +.Pp +To set a feature-set variable, use +.Dl >>SET Ar feature Li [AS] {ON | OFF} +If +.Ar feature +is +.Sy %EBCDIC-MODE , +the directive must appear before +.Sy PROGRAM-ID . +.Pp +To test a feature-set variable, use +.Dl >>IF Ar feature Li DEFINED +.. +.Ss Copybooks +.Nm +supports the CDF +.Sy COPY +statement, with or without its +.Sy REPLACING +component. For any statement +.sp +.D1 COPY Ar copybook +.sp +.Nm +looks first for an environment variable named +.Va copybook +and, if found, uses the contents of that variable as the name of the +copybook file. If that file does not exist, it continues looking for +a file named one of: +.sp +.Bl -bullet -compact -offset 5n +.It +.Pa copybook +(literally) +.It +.Pa copybook.cpy +.It +.Pa copybook.CPY +.It +.Pa copybook.cbl +.It +.Pa copybook.CBL +.It +.Pa copybook.cob +.It +.Pa copybook.COB +.El +.sp +in that order. It looks first in the same directory as the source +code file, and then in any +.Ar copybook-path +named with the +.Fl I +option. +. +.\" FIXME: need escape mechanism for directories with ':' in the name. +.Ar copybook-path +may (like the shell's +.Ev PATH +variable) be a colon-separated list. +. +The +.Fl I +option may occur multiple times on the command line. Each successive +.Ar copybook-path +is concatenated to previous ones. +Relative paths (having no leading +.Ql / Ns +\&) +are searched relative to the compiler's current working directory. +.Pp +For example, +.D1 \& +.D1 Fl I Li /usr/local/include:include +.D1 \& +searches first the directory where the \*[lang] program is found, next in +.Pa /usr/local/include , +and finally in an +.Pa include +subdirectory of the directory from which +.Nm +was invoked. +. +.Ss Intrinsic functions +.Nm +implements all intrinsic functions defined by \*[isostd], plus a few +others. They are listed alphabetically below. +.Bl -item -compact +.It +ABS ACOS ANNUITY ASIN ATAN +.It +BASECONVERT BIT_OF BIT_TO_CHAR BOOLEAN_OF_INTEGER BYTE_LENGTH +.It +CHAR CHAR_NATIONAL COMBINED_DATETIME CONCAT CONVERT COS CURRENT_DATE +.It +DATE_OF_INTEGER DATE_TO_YYYYMMDD DAY_OF_INTEGER DAY_TO_YYYYDDD DISPLAY_OF +.It +E EXCEPTION_FILE +EXCEPTION_FILE_N EXCEPTION_LOCATION EXCEPTION_LOCATION_N +EXCEPTION_STATEMENT EXCEPTION_STATUS EXP EXP10 +.It +FACTORIAL FIND_STRING +FORMATTED_CURRENT_DATE FORMATTED_DATE FORMATTED_DATETIME +FORMATTED_TIME FRACTION_PART +.It +HEX_OF HEX_TO_CHAR HIGHEST_ALGEBRAIC +.It +INTEGER INTEGER_OF_BOOLEAN INTEGER_OF_DATE INTEGER_OF_DAY +INTEGER_OF_FORMATTED_DATE INTEGER_PART +.It +LENGTH LOCALE_COMPARE +LOCALE_DATE LOCALE_TIME LOCALE_TIME_FROM_SECONDS LOG LOG10 LOWER_CASE +LOWEST_ALGEBRAIC +.It +MAX MEAN MEDIAN MIDRANGE MIN MOD MODULE_NAME +.It +NATIONAL_OF NUMVAL NUMVAL_C NUMVAL_F ORD +.It +ORD_MAX ORD_MIN +.It +PI PRESENT_VALUE +.It +RANDOM RANGE REM REVERSE +.It +SECONDS_FROM_FORMATTED_TIME +SECONDS_PAST_MIDNIGHT SIGN SIN SMALLEST_ALGEBRAIC SQRT +STANDARD_COMPARE STANDARD_DEVIATION SUBSTITUTE SUM +.It +TAN TEST_DATE_YYYYMMDD TEST_DAY_YYYYDDD TEST_FORMATTED_DATETIME +TEST_NUMVAL TEST_NUMVAL_C TEST_NUMVAL_F TRIM +.It +ULENGTH UPOS UPPER_CASE +USUBSTR USUPPLEMENTARY UUID4 UVALID UWIDTH +.It +VARIANCE +.It +WHEN_COMPILED +.It +YEAR_TO_YYYY +.El +. +.Ss Binary floating point DISPLAY +How the DISPLAY presents binary floating point numbers depends on the value. +.Pp +When a value has six or fewer decimal digits to the left of the +decimal point, it is expressed as +.Em 123456.789... . +.Pp +When a value is less than 1 and has no more than three zeroes to the +right of the decimal point, it is expressed as +.Em 0.0001234... . +.Pp +Otherwise, exponential notation is used: +.Em 1.23456E+7 . +.Pp +In all cases, trailing zeroes on the right of the number are removed +from the displayed value. +.Pp +.Bl -tag -compact -width FLOAT-EXTENDED +.It COMP-1 +displayed with 9 decimal digits. +.It COMP-2 +displayed with 17 decimal digits. +.It FLOAT-EXTENDED +displayed with 36 decimal digits. +.El +.Pp +Those digit counts are consistent with the IEEE 754 requirements for +information interchange. As one example, the description for COMP-2 +binary64 values (per Wikipedia). +.Pp +If an IEEE 754 double-precision number is converted to a decimal +string with at least 17 significant digits, and then converted back to +double-precision representation, the final result must match the +original number. +.Pp +17 digits was chosen so that the +.Sy DISPLAY +statement shows the contents +of a COMP-2 variable without hiding any information. +. +.Ss Binary floating point MOVE +During a +.Sy MOVE +statement, a floating-point value may be truncated. It will not be +unusual for Numeric Display values to be altered when moved through a +floating-point value. +.Pp +This program: +.Bd -literal + 01 PICV999 PIC 9999V999. + 01 COMP2 COMP-2. + PROCEDURE DIVISION. + MOVE 1.001 to PICV999 + MOVE PICV999 TO COMP2 + DISPLAY "The result of MOVE " PICV999 " TO COMP2 is " COMP2 + MOVE COMP2 to PICV999 + DISPLAY "The result of MOVE COMP2 TO PICV999 is " PICV999 +.Ed +.Pp +generates this result: +.Bd -literal + The result of MOVE 0001.001 TO COMP2 is 1.00099999999999989 + The result of MOVE COMP2 TO PICV999 is 0001.000 +.Ed +.Pp +However, the internal implementation can produce results that might be seem surprising: +.Bd -literal + The result of MOVE 0055.110 TO COMP2 is 55.1099999999999994 + The result of MOVE COMP2 TO PICV999 is 0055.110 +.Ed +.Pp +The source of this inconsistency is the way +.Nm +stores and converts +numbers. Converting the floating-point value to the numeric display +value 0055110 is done by multiplying 55.109999...\& by 1,000 and then +truncating the result to an integer. And it turns out that even +though 55.11 can’t be represented in floating-point as an exact value, +the product of the multiplication, 55110, is an exact value. +.Pp +In cases where it is important for conversions to have predictable +results, we need to be able to apply rounding, which can be done with +an arithmetic statement: +.Bd -literal + MOVE 1.001 to PICV999 + MOVE PICV999 TO COMP2 + DISPLAY "The result of MOVE " PICV999 " TO COMP2 is " COMP2 + MOVE COMP2 to PICV999 + DISPLAY "The result of MOVE COMP2 TO PICV999 is " PICV999 + ADD COMP2 to ZERO GIVING PICV999 ROUNDED + DISPLAY "The result of ADD COMP2 to ZERO GIVING PICV999 ROUNDED is " PICV999 +.sp + The result of MOVE 0001.001 TO COMP2 is 1.00099999999999989 + The result of MOVE COMP2 TO PICV999 is 0001.000 + The result of ADD COMP2 to ZERO GIVING PICV999 ROUNDED is 0001.001 +.Ed +.Ss Binary floating point computation +.Nm +attempts to do internal computations using binary integers when +possible. Thus, simple arithmetic between binary values and numeric +display values conclude with binary intermediate results. +.Pp +If a floating-point value gets included in the mix of variables +specified for a calculation, then the intermediate result becomes a +128-bit floating-point value. +. +.Ss A warning about binary floating point comparison +The cardinal rule when doing comparisons involving floating-point +values: Never, ever, test for equality. It’s just not worth the hassle. +.Pp +For example: +.Bd -literal + WORKING-STORAGE SECTION. + 01 COMP1 COMP-1 VALUE 555.11. + 01 COMP2 COMP-2 VALUE 555.11. + PROCEDURE DIVISION. + DISPLAY "COMPARE " COMP1 " with " COMP2 + IF COMP1 EQUAL COMP2 DISPLAY "Equal" ELSE DISPLAY "Not equal" END-IF +.sp + MOVE COMP1 to COMP2 + DISPLAY "COMPARE " COMP1 " with " COMP2 + IF COMP1 EQUAL COMP2 DISPLAY "Equal" ELSE DISPLAY "Not equal" END-IF +.Ed +.Pp +the results: +.Bd -literal + COMPARE 555.1099854 with 555.110000000000014 + Not equal + COMPARE 555.1099854 with 555.1099853515625 + Equal +.Ed +.Pp +Why? Again, it has to do with the internals of +.Nm . +When differently sized floating-point values need to be compared, they +are first converted to 128-bit floats. And it turns out that when a +COMP1 is moved to a COMP2, and they are both converted to +FLOAT-EXTENDED, the two resulting values are (probably) equal. +.Pp +Avoid testing for equality unless you really know what you are doing +and you really test the code. And then avoid it anyway. +.Pp +Finally, it is observably the case that the +.Nm +implementations of floating-point conversions and comparisons don’t +precisely match the behavior of other \*[lang] compilers. +.Pp +You have been warned. +. +.Sh ENVIRONMENT +.Bl -tag -width COBPATH +.It Ev COBPATH +If defined, specifies the directory paths to be used by the +.Nm +runtime library, +.Pa libgcobol.so , +to locate shared objects. +Like +.Ev LD_LIBRARY_PATH , +it may contain several directory names separated by a colon +.Pq Ql \&: . +.Ev COBPATH +is searched first, followed by +.Ev LD_LIBRARY_PATH . +.Pp +Each directory is searched for files whose name ends in +.Ql ".so" . +For each such file, +.Xr dlopen 3 +is attempted, and, if successful +.Xr dlsym 3 . +No relationship is defined between the symbol's name and the filename. +.Pp +Without +.Ev COBPATH , +binaries produced by +.Nm +behave as one might expect of any program compiled with gcc. Any +shared objects needed by the program are mentioned on the command line +with a +.Fl l Ns Ar library +option, and are found by following the executable's +.Pa RPATH +or otherwise per the configuration of the runtime linker, +.Xr ld.so 8 . +. +.It Ev UPSI +\*[lang] defines a User Programmable Status Indicator (UPSI) switch. In +.Nm , +the settings are denoted +.Sy UPSI-0 +through +.Sy UPSI-7 , +where 0-7 indicates a bit position. The value of the UPSI switches is +taken from the +.Ev UPSI +environment variable, whose value is a string of up to eight 1's and +0's. The first character represents the value of +.Sy UPSI-0 , +and missing values are assigned 0. For example, +.Sy UPSI=1000011 +in the environment sets bits 0, 5, and 6 on, which means that +.Sy UPSI-0 , +.Sy UPSI-5 , +and +.Sy UPSI-6 +are on. +.It Ev GCOBOL_TEMPDIR +causes any temporary files created during CDF processing to be written +to a file whose name is specified in the value of +.Ev GCOBOL_TEMPDIR . +If the value is just +.Dq / , +the effect is different: each copybook read is reported on standard +error. This feature is meant to help diagnose mysterious copybook +errors. +.El +. +.Sh FILES +Executables produced by +.Nm +require the runtime support library +.Pa libgcobol , +which is provided both as a static library and as a shared object. +. +.\" .Sh DIAGNOSTICS +. +.Sh COMPATIBILITY +The ISO standard leaves the default file organization up to the implementation; in +.Nm , +the default is +.Sy "SEQUENTIAL" . +. +.Ss On-Disk Format +Any ability to use files produced by other \*[lang] compilers, or for +those compilers to use files produced by +.Nm , +is the product of luck and intuition. Various compilers interpret the +ISO standard differently, and the standard's text is +not always definitive. +.Pp +For +.Sy "ORGANIZATION IS LINE SEQUENTIAL" +files (explicitly or by default), +.Nm , +absent specific direction, produces an ordinary Linux text file: for +each WRITE, the data are written, followed by an ASCII NL (hex 0A) +character. On READ, the record is read up to the size of the +specified record or NL, whichever comes first. The NL is not included +in the data brought into the record buffer; it serves only as an +on-disk record-termination marker. Consequently, +.Sy SEQUENTIAL +and +.Sy "LINE SEQUENTIAL" +files work the same way: the \*[lang] program never sees the record +terminator. +.Pp +When +.Sy READ +and +.Sy WRITE +are used with +.Sy ADVANCING , +however, the game changes. If +.Sy ADVANCING +is used with +.Sy "LINE SEQUENTIAL" +files, +it is honored by +.Nm . +.Pp +Other compilers may not do likewise. +According to ISO, in +.Sy WRITE +(14.9.47.3 General rules) +.Sy ADVANCING +is +.Em ignored +for files for which +.Dq "the physical file does not support vertical positioning" . +It further states that, in the absence of +.Sy ADVANCING , +.Sy WRITE +proceeds as if +.Dq "as if the user has specified AFTER ADVANCING 1 LINE" . +Some other implementations interpret that to mean that the first +.Sy WRITE +to a +.Sy "LINE SEQUENTIAL" +file results in a leading NL on the first line, and no trailing NL on +the last line. Some furthermore +.Em prohibit +the use of +.Sy ADVANCING +with +.Sy "LINE SEQUENTIAL" +files. +. +.\" .Sh SEE ALSO +. +.Sh STANDARDS +The reference standard for +.Nm +is \*[isostd]. +.Bl -bullet -compact +.It +If +.Nm +compiles code consistent with that standard, the resulting program +should execute correctly; any other result is a bug. +.It +If +.Nm +compiles code that does not comply with that standard, but runs correctly according to some other specification, that represents a non-standard extension. One day, the +.Fl pedantic +option will produce diagnostic messages for such code. +.It +If +.Nm +rejects code consistent with that standard, that represents an aspect +of \*[lang] that is (or is not) on the To Do list. If you would like +to see it compile, please get in touch with the developers. +.El +. +.Ss Status of NIST \*[lang] Compiler Verification Suite +.Bl -tag -compact -width "\0\0100% NC" +.It NC 100% +Nucleus +.It SQ 100% +Sequential I/O +.It RL 100% +Relative I/O +.It IX 100% +Indexed I/O +.It IC 100% +Inter-Program Communication +.It ST 100% +Sort-Merge +.It SM 100% +Source Text Manipulation RW \en Report Writer +.It CM +Communication +.It DB to do? +Debug +.It SG +Segmentation +.It IF 100% +Intrinsic Function +.El +.Pp +Where +.Nm +passes 100% of the tests in a module, we exclude the (few) tests for +obsolete features. The authors regard features that were obsolete in +1985 to be well and truly obsolete today, and did not implement them. +. +.Ss Notable deferred features +CCVS-85 modules not marked with above with any status (CM, and SG) are on the +.Dq "hard maybe" +list, meaning they await an interested party with real code using the feature. +.Pp +.Nm +does not implement Report Writer or Screen Section. +. +.Ss Beyond COBOL/85 +.Nm +increasingly implements \*[isostd]. For example, +.Sy DECLARATIVES +is not tested by CCVS-85, but are implemented by +.Nm Ns . +Similarly, Exception Conditions were not defined in 1985, and +.Nm +contains a growing number of them. +.Pp +The authors are well aware that a complete, pure \*[lang]-85 compiler +won't compile most existing \*[lang] code. Every vendor offered (and +offers) extensions, and most environments rely on a variety of +preprocessors and ancillary systems defined outside the standard. The +express goal of adding an ISO \*[lang] front-end to GCC is to establish a +foundation on which any needed extensions can be built. +. +.Sh HISTORY +\*[lang], the language, may well be older than the reader. To the +author's knowledge, free \*[lang] compilers first began to appear in 2000. +Around that time an earlier \*[lang] for GCC project +.br +.Lk https://cobolforgcc.sourceforge.net/ cobolforgcc +met with some success, but was never officially merged into GCC. +.Pp +This compiler, +.Nm , +was begun by +.Lk https://www.cobolworx.com/ COBOLworx +in the fall of 2021. The +project announced a complete implementation of the core language +features in December 2022. +. +.Sh AUTHORS +.Bl -tag -compact +.It "James K. Lowden" +(jklowden@cobolworx.com) is responsible for the parser. +.It "Robert Dubner" +(rdubner@cobolworx.com) is responsible for producing the GIMPLE tree, +which is input to the GCC back-end. +.El +. +.Sh CAVEATS +.Bl -bullet -compact +.It +.Nm +has been tested only on x64 and Apple M1 processors running Linux in +64-bit mode. +.It +The I/O support has not been extensively tested, and does not +implement or emulate many features related to VSAM and other mainframe +subsystems. While LINE-SEQUENTIAL files are ordinary text files that +can be manipulated with standard utilities, INDEXED and RELATIVE files +produced by +.Nm +are not compatible with that of any other \*[lang] compiler. Enhancements +to the I/O support will be readily available to the paying customer. +.El +. +.\" .Sh BUGS diff --git a/gcc/cobol/gcobol.3 b/gcc/cobol/gcobol.3 new file mode 100644 index 00000000000..adc141a7aad --- /dev/null +++ b/gcc/cobol/gcobol.3 @@ -0,0 +1,328 @@ +.ds lang COBOL +.ds gcobol GCC\ \*[lang]\ Front-end +.Dd \& March 2024 +.Dt GCOBOL 3\& "GCC \*[lang] Compiler" +.Os Linux +.Sh NAME +.Nm gcobol +.Nd \*[gcobol] I/O function API +.Sh LIBRARY +.Pa libgcobol +. +.Sh SYNOPSIS +.In symbols.h +.In io.h +.In gcobolio.h +. +.Ft gcobol_io_t Fn gcobol_fileops +.Bd -literal +class gcobol_io_t { +public: + static const char constexpr marquee[64]; + typedef void (open_t)( cblc_file_t *file, + char *filename, + int mode_char, + int is_quoted ); + typedef void (close_t)( cblc_file_t *file, + int how ); + typedef void (start_t)( cblc_file_t *file, + int relop, // needs enum + int first_last_key, + size_t length ); + typedef void (read_t)( cblc_file_t *file, + int where ); + typedef void (write_t)( cblc_file_t *file, + unsigned char *data, + size_t length, + int after, + int lines, + int is_random ); + typedef void (rewrite_t)( cblc_file_t *file, + size_t length, bool is_random ); + typedef void (delete_t)( cblc_file_t *file, + bool is_random ); + open_t *Open; + close_t *Close; + start_t *Start; + read_t *Read; + write_t *Write; + rewrite_t *Rewrite; + delete_t *Delete; +\0\0... +}; +.Ed +. +.Sh DESCRIPTION +.Nm +supplies replaceable I/O functionality via +.Fn gcobol_fileops . +It returns a pointer to a structure of C function pointers that +implement sequential, relative, and indexed file operations over files +whose On Disk Format (ODF) is defined by +.Nm . +A user wishing to use another library that implements the same +functionality over a different ODF must supply a different implementation of +.Fn gcobol_fileops , +plus 7 functions, as described in this document. The pointers to +those user-implemented functions are placed in a C++ object of type +.Vt gcobol_io_t +and an instantiation of that type is returned by +.Fn gcobol_fileops . +The compiled program initializes I/O operations by calling that +function the first time any file is opened. +.Pp +Each function takes as its first argument a pointer to a +.Vt cblc_file_t +object, which is analogous to a +.Vt FILE +object used in the C +.Sy stdio +functions. The +.Vt cblc_file_t +structure acts as a communication area between the compiled program +and the I/O library. Any information needed about the file is kept +there. Notably, the outcome of any operation is stored in that +structure in the +.Va file_status +member, not as a return code. Information about the +.Em operation +(as opposed to the +.Em file ) +appear as parameters to the function. +.Pp +.Vt cblc_file_t +has one member, not used by +.Nm , +that is reserved for the user: +.Dl Vt "void *" Pa implementation . +.Pp +User-supplied I/O functions may assign and dereference +.Pa implementation . +.Nm +will preserve the value, but never references it. +.Pp +The 7 function pointers in +.Vt gcobol_io_t +are +.Bl -hang -width Rewrite +.It Open +.Ft void +.Fn open_t "cblc_file_t *file" "char *filename" "int mode_char" "int is_quoted" +.br +parameters: +.Bl -tag -width mode_char -compact +.It Ar filename +is the filename, as known to the OS +.It Ar mode_char +is one of +.Bl -hang -width MM -compact +.It Sq r +OPEN INPUT: read-only mode +.It Sq w +OPEN OUTPUT: create a new file or overwrite an existing one +.It Sq a +EXTEND: append to sequential file +.It Sq + +modify existing file +.El +.It Ar is_quoted +If +.Sy true , +.Ar filename +is taken literally. If +.Sy false , +.Ar filename +is interpreted as the name of an environment variable, the contents of +which, if extant, are taken as the name of the file to be opened. If +no such variable exists, then +.Ar filename +is used verbatim. +.El +.It Close +.Ft void +.Fn close_t "cblc_file_t *file" "int how" +.br +parameters: +.Bl -hang -width how -compact +.It Ar how +A value of 0x08 closes a +.Dq REEL\ unit . +Because no such thing is supported, the function sets the file status to +.Dq 07 , +meaning +.Em "not a tape" . +.El +.It Start +.Ft void +.Fn start_t "cblc_file_t *file" "int relop" "int first_last_key" "size_t length" +.br +parameters: +.Bl -tag -width length -compact +.It Ar relop +is one of +.Bl -hang -width LT -compact +.It Li 0 +means +.Sq < +.It Li 1 +means +.Sq <= +.It Li 2 +means +.Sq = +.It Li 3 +means +.Sq != +.It Li 4 +means +.Sq >= +.It Li 5 +means +.Sq > +.El +.It Ar first_last_key +is the key number (starting at 1) of the key within the +.Vt cblc_file_t +structure. +.It Ar length +is the size of the key (TODO: per the START statement?) +.El +.It Read +.Ft void +.Fn read_t "cblc_file_t *file" "int where" +parameters: +.Bl -tag -width where -compact +.It Ar where +.Bl -hang -width 000 -compact +.It Li -2 +PREVIOUS +.It Li -1 +NEXT +.It Ar \0N +represents a key number, starting with 1, in the +.Vt cblc_file_t +structure. The value of that key is used to find the record, and read it. +.El +.El +.It Write +.Ft void +.Fn write_t "cblc_file_t *file" "unsigned char *data" \ +"size_t length" "int after" "int lines" "int is_random" +.br +parameters: +.Bl -hang -width is_random -compact +.It Ar data +address of in-memory buffer to write +.It Ar length +length of in-memory buffer to write +.It Ar after +has the value 1 if the +.D1 "AFTER ADVANCING n LINES" +phrase was present in the +.Sy WRITE +statement, else 0 +.It Ar lines +may be one of +.Bl -hang -width 00000 -compact +.It Li -666 +ADVANCING PAGE +.It Li \0\0-1 +no +.Sy ADVANCING +phrase appeared +.It \0\0\00 +ADVANCING 0 LINES +is valid +.It \0\0>0 +the value of +.Ar n +in +ADVANCING +.Ar n +LINES +.El +.It Ar is_random +is +.Sy true +if the +.Em "access mode" +is RANDOM +.El +.It Rewrite +.Ft void +.Fn rewrite_t "cblc_file_t *file" "size_t length" "bool is_random" +parameters: +.Bl -hang -width is_random -compact +.It Ar length +number of bytes to write +.It Ar is_random +.Sy true +if +.Em "access mode" +is RANDOM +.El +.It Delete +.Ft void +.Fn delete_t "cblc_file_t *file" "bool is_random" +parameters: +.Bl -hang -width is_random -compact +.It Ar is_random +.Sy true +if +.Em "access mode" +is RANDOM +.El +.El +. +.Pp +The library implements one function that the +.Nm Ns +-produced binary calls directly: +.Bl -item +.It +.Ft gcobol_io_t * +.Fn gcobol_fileops +.br +This function populates a +.Vt gcobol_io_t +object with the above function pointers. The compiled binary begins +by calling +.Fn gcobol_fileops Ns , +and then uses the supplied pointers to effect I/O. +.El +. +.\" The following commands should be uncommented and +.\" used where appropriate. +.\" .Sh IMPLEMENTATION NOTES +.\" This next command is for sections 2, 3, and 9 only +.\" (function return values). +.Sh RETURN VALUES +I/O functions return +.Sy void . +.Fn gcobol_fileops +returns +.Vt gcobol_io_t* . +.\" .Sh FILES +.\" .Sh COMPATIBILITY +.\" This next command is for sections 2, 3, 4, and 9 only +.\" (settings of the errno variable). +.\" .Sh ERRORS +.\" .Sh SEE ALSO +.Sh STANDARDS +The I/O library supplied by +.Nm , +.Sy libgcobolio.so , +supports the I/O semantics defined by ISO \*[lang]. +It is not intended to be compatible with any other ODF. That is, +.Sy libgcobolio.so +cannot be used to exchange data with any other \*[lang] implementation. +.Pp +The purpose of the +.Vt gcobol_io_t +structure is to allow the use of other I/O implementations with other ODF representations. +.\" .Sh HISTORY +.\" .Sh AUTHORS +.Sh CAVEATS +The library is not well tested, not least because it is not implemented. +.Sh BUGS +The future is yet to come. diff --git a/gcc/cobol/gcobolspec.cc b/gcc/cobol/gcobolspec.cc new file mode 100644 index 00000000000..20ca757fa87 --- /dev/null +++ b/gcc/cobol/gcobolspec.cc @@ -0,0 +1,694 @@ +/* Specific flags and argument handling of the Cobol front-end. + Copyright (C) 2021-2025 Free Software Foundation, Inc. + +This file is part of GCC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* This file implements gcobol's language-specific option handling for the COBOL front + end. It is based on a similar file for the Fortran front end, which + itself was derived from the C front end. Specifically, it defines + + lang_specific_driver(cl_decoded_option**, unsigned int*, int*) + + for gcobol. + + For GNU COBOL, we do the following to the argument list + before passing it to `gcc': + + 1. Make sure `-lgcobol -lm' is at the end of the list. + + 2. Make sure each time `-lgcobol' or `-lm' is seen, it forms + part of the series `-lgcobol -lm'. + + #1 and #2 are not done if `-nostdlib' or any option that disables + the linking phase is present, or if `-xfoo' is in effect. Note that + a lack of source files or -l options disables linking. + + The way this file builds the new argument list was rewritten to be easier to + maintain, and improve the way it decides to add or not add extra arguments, + etc. Several improvements were made in the handling of arguments, primarily + to make it more consistent with `gcc' itself. */ + +/* + * Number of extra output files that lang_specific_pre_link may generate. + * Unused. + */ + +#include "cobol-system.h" +#include "coretypes.h" +#include "opt-suggestions.h" +#include "gcc.h" +#include "opts.h" +#include "tm.h" +#include "intl.h" + +int lang_specific_extra_outfiles = 0; + +#ifndef MATH_LIBRARY +#define MATH_LIBRARY "m" +#endif + +#ifndef DL_LIBRARY +#define DL_LIBRARY "dl" +#endif + +#ifndef STDCPP_LIBRARY +#define STDCPP_LIBRARY "stdc++" +#endif + +#ifndef COBOL_LIBRARY +#define COBOL_LIBRARY "gcobol" +#endif + +/* The original argument list and related info is copied here. */ +static const struct cl_decoded_option *original_options; + +/* The new argument list will be built here. */ +static std::vectornew_opt; + +// #define NOISY 1 + +static void +append_arg(const struct cl_decoded_option arg) + { +#ifdef NOISY + static int counter = 1; + fprintf( stderr, + ">>>>>> #%2d Appending %4ld %s\n", + counter++, + arg.opt_index, + arg.orig_option_with_args_text); +#endif + + new_opt.push_back(arg); + } + +static void +append_option (size_t opt_index, const char *arg, int value) + { + /* Append an option described by OPT_INDEX, ARG and VALUE to the list + being built. */ + struct cl_decoded_option decoded; + generate_option(opt_index, arg, value, CL_DRIVER, &decoded); + append_arg(decoded); + } + +static void +add_arg_lib(const char *library, bool force_static ATTRIBUTE_UNUSED) + { + /* Append a libgcobol argument to the list being built. If + FORCE_STATIC, ensure the library is linked statically. */ +#ifdef HAVE_LD_STATIC_DYNAMIC + if( force_static ) + { + append_option (OPT_Wl_, LD_STATIC_OPTION, 1); + } + append_option (OPT_l, library, 1); +#endif +#ifdef HAVE_LD_STATIC_DYNAMIC + if( force_static ) + { + append_option (OPT_Wl_, LD_DYNAMIC_OPTION, 1); + } +#endif + } + +static void +append_rdynamic() + { + // This is a bit ham-handed, but I was in a hurry. + struct cl_decoded_option decoded = {}; + decoded.opt_index = OPT_rdynamic; + decoded.orig_option_with_args_text = "-rdynamic"; + decoded.canonical_option[0] = "-rdynamic"; + decoded.canonical_option_num_elements = 1; + decoded.value = 1; + append_arg(decoded); + return; + } + +static void +append_rpath() + { +#ifdef EXEC_LIB + // Handing append_option() something on the stack Just Doesn't Work + if( strlen(EXEC_LIB) ) + { + static char ach[256]; + snprintf(ach, sizeof(ach), "-rpath=%s", EXEC_LIB); + append_option (OPT_Wl_, ach, 1); + } +#endif + return; + } + +static void +append_allow_multiple_definition() + { + append_option (OPT_Wl_, "--allow-multiple-definition", 1); + return; + } + +static void +append_fpic() + { + // This is a bit ham-handed, but I was in a hurry. + struct cl_decoded_option decoded = {}; + decoded.opt_index = OPT_rdynamic; + decoded.orig_option_with_args_text = "-fPIC"; + decoded.canonical_option[0] = "-fPIC"; + decoded.canonical_option_num_elements = 1; + decoded.value = 1; + append_arg(decoded); + return; + } + +void +lang_specific_driver (struct cl_decoded_option **in_decoded_options, + unsigned int *in_decoded_options_count, + int *in_added_libraries ATTRIBUTE_UNUSED) + { + int argc = (int)*in_decoded_options_count; + struct cl_decoded_option *decoded_options = *in_decoded_options; + + // This is the language in effect; it is changed by the OPT_x option. + // Start it out with the default of "none", which is the same as "cobol". + const char *language = "none"; + + /* The number of input and output files in the incoming arg list. */ + int n_infiles = 0; + int n_outfiles = 0; + + // The number of input files when the language is "none" or "cobol" + int n_cobol_files = 0; + + // saw_OPT_no_main means "don't expect -main" + bool saw_OPT_no_main = false; + + // The number of incoming OPT_main and OPT_main_ options seen + int n_mains = 0; + + bool saw_OPT_c = false; + bool saw_OPT_shared = false; + bool saw_OPT_pic = false; + bool saw_OPT_PIC = false; + + bool verbose = false; + + // These flags indicate whether we need various libraries + + bool need_libgcobol = true; + bool need_libmath = (MATH_LIBRARY[0] != '\0'); + bool need_libdl = (DL_LIBRARY[0] != '\0'); + bool need_libstdc = (STDCPP_LIBRARY[0] != '\0'); + // bool need_libquadmath = (QUADMATH_LIBRARY[0] != '\0'); + bool need_rdynamic = true; + bool need_allow_multiple_definition = true; + + // Separate flags for a couple of static libraries + bool static_libgcobol = false; + bool static_in_general = false; + + /* WEIRDNESS ALERT: + + Sometime around August of 2024, changes were made to the GCC source code + that resulted in an "memory released twice" run-time error when a + std::unordered_map was destructed twice, which usually can't happen. But + it was happening in a gcobol-generated executable. Investigation revealed + that + + gocobol ... libgcobol.a -lgcobol + + resulted in __gg__alphabet_states being destructed twice. + + This should not happen! In normal -shared code, including both libxxx.a + and -lxxx is perfectly legitimate and causes no problem, because the first + one to be encountered provides the globals. But something about the + extremely complex makefile for libgcobol was resulting in the double + destructor problem. + + A couple of days of looking for a fix were unsuccessful. + + So, I have added logic to this module to prevent the otherwise automatic + insertion of "-lgcobol" when there is an explicit "libgcobol.a" in the + parameters. + + */ + + int index_libgcobol_a = 0; + + // This is for the -Wl,-rpath= + bool need_rpath = true; + + bool no_files_error = true; + +#ifdef NOISY + int counter=1; + for(int i = 0; i < argc; i++) + { + fprintf( stderr, + ">>>>>> #%2d Incoming: %4ld %s\n", + counter++, + decoded_options[i].opt_index, + decoded_options[i].orig_option_with_args_text); + } + fprintf (stderr, "\n"); +#endif + + // There is always the possibility that no changes to the options + // will be needed: + + /* First pass through arglist. + + If -nostdlib or a "turn-off-linking" option is anywhere in the + command line, don't do any library-option processing (except + relating to -x). */ + + for(int i = 1; i < argc; ++i) + { + if (decoded_options[i].errors & CL_ERR_MISSING_ARG) + { + continue; + } + + if( strcmp( decoded_options[i].orig_option_with_args_text, "-###") == 0 ) + { + no_files_error = false; + } + + switch(decoded_options[i].opt_index) + { + case OPT_SPECIAL_input_file: + no_files_error = false; + n_infiles += 1; + if( strcmp(language, "none") == 0 + || strcmp(language, "cobol") == 0 ) + { + n_cobol_files += 1; + } + if( strstr(decoded_options[i].orig_option_with_args_text, "libgcobol.a") ) + { + // We have been given an explicit libgcobol.a. We need to note that. + index_libgcobol_a = i; + } + continue; + + case OPT_shared: + saw_OPT_shared = true; + break; + + case OPT_fpic: + saw_OPT_pic = true; + break; + + case OPT_fPIC: + saw_OPT_PIC = true; + break; + + case OPT_c: + // With this option, no libraries need be loaded + saw_OPT_c = true; + need_libgcobol = false; + need_libmath = false; + need_libdl = false; + need_libstdc = false; + // need_libquadmath = false; + need_rdynamic = false; + break; + + case OPT_rdynamic: + need_rdynamic = false; + break; + + case OPT_Wl_: + if( strstr(decoded_options[i].orig_option_with_args_text, + "--allow-multiple-definitions") ) + { + need_allow_multiple_definition = false; + } + if( strstr(decoded_options[i].orig_option_with_args_text, "-rpath") ) + { + // The caller is doing something with -rpath. Assume they know what + // they are doing + + // On second thought, always install our rpath. It goes at the end, + // so if the user specifies and rpath that they prefer, it'll get + // taken first. + need_rpath = true; + } + break; + + case OPT_nostdlib: + case OPT_nodefaultlibs: + case OPT_r: + case OPT_S: + case OPT_fsyntax_only: + case OPT_E: + // With these options, no libraries need be loaded + need_libgcobol = false; + need_libmath = false; + need_libdl = false; + need_libstdc = false; + // need_libquadmath = false; + need_rdynamic = false; + break; + + case OPT_static_libgcobol: +#ifdef HAVE_LD_STATIC_DYNAMIC + static_libgcobol = true; + need_libgcobol = true; +#endif + break; + + case OPT_l: + n_infiles += 1; + if(strcmp(decoded_options[i].arg, MATH_LIBRARY) == 0) + { + need_libmath = false; + } + else if(strcmp(decoded_options[i].arg, DL_LIBRARY) == 0) + { + need_libdl = false; + } + else if(strcmp(decoded_options[i].arg, COBOL_LIBRARY) == 0) + { + need_libgcobol = false; + } + else if(strcmp(decoded_options[i].arg, STDCPP_LIBRARY) == 0) + { + need_libstdc = false; + } + break; + + case OPT_o: + n_outfiles += 1; + break; + + case OPT_nomain: + saw_OPT_no_main = true; + break; + + case OPT_main: + case OPT_main_: + n_mains += 1; + break; + + case OPT_v: + no_files_error = false; + verbose = true; + break; + + case OPT_x: + language = decoded_options[i].arg; + break; + + case OPT__version: + no_files_error = false; + break; + + case OPT__help: + /* + * $ man ./gcobol.1 | ./help.gen + */ + puts( "Options specific to gcobol: " ); + puts( + " -main option uses the first PROGRAM of filename as the entry point for\n" + " the main() procedure. \n" + " -no_main \n" + " means that there is no -main, and the main() entry point is\n" + " provided by some other compilation or .o file\n" + " -findicator-column\n" + " describes the location of the Indicator Area in a COBOL file with\n" + " standard 80-column lines. \n" + " -ffixed-form\n" + " Use strict Reference Format in reading the COBOL input: 72-char‐\n" + " acter lines, with a 6-character sequence area, and an indicator\n" + " column. \n" + " -ffree-form\n" + " Force the COBOL input to be interpreted as free format. \n" + " -fmax-errors nerror\n" + " nerror represents the number of error messages produced. \n" + " -fflex-debug, -fyacc-debug\n" + " produce messages useful for compiler development. \n" ); + + + /* Let gcc.cc handle this, as it has a really + cool facility for handling --help and --verbose --help. */ + return; + + default: + break; + } + } + + if( saw_OPT_no_main && n_mains ) + { + char ach[] = "\"-no-main\" and \"-main\" are incompatible"; + fatal_error(input_location,"%s", ach); + } + + bool suppress_main = saw_OPT_no_main + || (saw_OPT_c && n_mains==0) + || saw_OPT_shared; + + if( no_files_error || ((n_outfiles != 0) && (n_infiles == 0)) ) + { + fatal_error(input_location, "no input files"); + } + + /* If there are no input files, there is no need for any libraries. */ + if( n_infiles == 0 ) + { + need_libgcobol = false; + need_libmath = false; + need_libdl = false; + need_libstdc = false; + // need_libquadmath = false; + } + + /* Second pass through arglist, transforming arguments as appropriate. */ + + append_arg(decoded_options[0]); /* Start with command name, of course. */ + + bool first_COBOL_file = true; + bool prior_main = false; + const char *entry_point = NULL; + + // Reset the current language, in case it was changed during the first pass + language = "none"; + + for(int i = 1; i < argc; ++i) + { + if (decoded_options[i].errors & CL_ERR_MISSING_ARG) + { + append_arg(decoded_options[i]); + continue; + } + + switch (decoded_options[i].opt_index) + { + case OPT_SPECIAL_input_file: + if( strcmp(language, "none") == 0 + || strcmp(language, "cobol") == 0 ) + { + // This is a COBOL source code file + if( !suppress_main && n_mains==0 && first_COBOL_file ) + { + // This is a case where the -c option is not present, and there + // were no -main switches. So, we are going to insert a -main switch + // in front of this, the first COBOL file + first_COBOL_file = false; + prior_main = true; + } + + if( prior_main ) + { + char ach[128]; + if( entry_point ) + { + strcpy(ach, entry_point); + } + else + { + strcpy(ach, decoded_options[i].arg); + } + append_option(OPT_main_, ach, 1); + prior_main = false; + entry_point = NULL; + } + } + append_arg(decoded_options[i]); + break; + + case OPT_main: + if( prior_main ) + { + char ach[] = "Multiple \"-main\" without a source file"; + fatal_error(input_location, "%s", ach); + } + // This is a simple -main that needs to be followed by a COBOL file + prior_main = true; + break; + + case OPT_main_: // Note the trailing underscore + if( prior_main ) + { + char ach[] = "Multiple \"-main\" without a source file"; + fatal_error(input_location, "%s", ach); + } + // This is -main= that needs to be followed by a COBOL file + entry_point = decoded_options[i].arg; + prior_main = true; + break; + + case OPT_nomain: + append_arg(decoded_options[i]); + break; + + case OPT_x: + language = decoded_options[i].arg; + append_arg(decoded_options[i]); + break; + + case OPT_static_libgcobol: + // Don't pass this one on to cobol1 + break; + +////#ifdef __x86_64__ +//// case OPT_m32: +//// error ( "unrecognized command-line option %<-%s%>; " +//// "(32-bit executables cannot be generated)", "m32"); +//// break; +////#endif + case OPT_static: + static_in_general = true; + break; + + default: + append_arg(decoded_options[i]); + break; + } + } + + /* As described above, we have empirically noticed that when the command line + explicitly specifies libgcobol.a as an input, a following -lgcobol causes + the "on exit" functions of the library to be executed twice. This can + cause trouble for c++ class destructors that expect to be run only once. + + So, we rather hamhandedly prevent the inclusion of the default -lgcobol + parameter when a libgcobol.a was found to be present. + + Note that if the user *explicitly* specifies both libgcobol.a and + -lgocobol, then he gets what he asked for, and the problem then belongs to + them. + + */ + + if( index_libgcobol_a ) + { + need_libgcobol = false; + } + + if( need_libgcobol ) + { + if( 0 != strcmp(EXEC_LIB, "/usr/lib") ) + { + append_option(OPT_L, EXEC_LIB, 1); + } + add_arg_lib(COBOL_LIBRARY, static_libgcobol); + } + if( need_libmath ) + { + add_arg_lib(MATH_LIBRARY, static_in_general); + } + if( need_libdl ) + { + add_arg_lib(DL_LIBRARY, static_in_general); + } + if( need_libstdc && static_in_general ) + { + add_arg_lib(STDCPP_LIBRARY, static_in_general); + } + + if( saw_OPT_shared && !saw_OPT_pic && !saw_OPT_PIC ) + { + append_fpic(); + } + + if( need_rdynamic ) + { + append_rdynamic(); + } + + if( need_allow_multiple_definition && (n_infiles || n_outfiles) ) + { + append_allow_multiple_definition(); + } + + if( need_rpath && (n_infiles || n_outfiles) ) + { + append_rpath(); + } + + if( prior_main ) + { + char ach[] = "\"-main\" without a source file"; + fatal_error(input_location, "%s", ach); + } + + // We now take the new_opt vector, and turn it into an array of + // cl_decoded_option + + size_t new_option_count = new_opt.size(); + struct cl_decoded_option *new_options = XNEWVEC (struct cl_decoded_option, new_option_count); + + for(size_t i=0; istatement_list_stack.back())) + +extern char *cobol_name_mangler(const char *cobol_name); +static tree gg_attribute_bit_get(struct cbl_field_t *var, cbl_field_attr_t bits); + +static tree label_list_out_goto; +static tree label_list_out_label; +static tree label_list_back_goto; +static tree label_list_back_label; + +static void hijack_for_development(const char *funcname); + +static size_t sv_data_name_counter = 1; +static int call_counter = 1; +static int pseudo_label = 1; + +static bool suppress_cobol_entry_point = false; +static char ach_cobol_entry_point[256] = ""; + +bool bSHOW_PARSE = getenv("SHOW_PARSE"); +bool show_parse_sol = true; +int show_parse_indent = 0; + +#define DEFAULT_LINE_NUMBER 2 + +#ifdef LINE_TICK +/* This code is used from time to time when sorting out why compilation + takes more time than expected */ +static void +line_tick() + { + using namespace std::chrono; + static high_resolution_clock::time_point t1 = high_resolution_clock::now(); + static high_resolution_clock::time_point t2; + int line_now = CURRENT_LINE_NUMBER; + static int line = 0; + if( (line_now / 10000) != (line / 10000) ) + { + line = line_now; + t2 = high_resolution_clock::now(); + duration time_span = duration_cast>(t2 - t1); + fprintf(stderr, "%6d %6.1lf\n", line, time_span.count()); + } + } +#else +#define line_tick() +#endif + +typedef struct TREEPLET + { + tree pfield; + tree offset; + tree length; + } TREEPLET; + +static +void +treeplet_fill_source(TREEPLET &treeplet, cbl_refer_t &refer) + { + treeplet.pfield = gg_get_address_of(refer.field->var_decl_node); + treeplet.offset = refer_offset_source(refer); + treeplet.length = refer_size_source(refer); + } + +tree file_static_variable(tree type, const char *v) + { + // This routine returns a reference to an already-defined file_static variable + // You need to know the type that was used for the definition. + return gg_declare_variable(type, v, NULL, vs_file_static); + } + +static void move_helper(tree size_error, // INT + cbl_refer_t destref, + cbl_refer_t sourceref, + TREEPLET &tsource, + cbl_round_t rounded, + bool check_for_error, + bool restore_on_error = false + ); + +// set using -f-trace-debug, defined in lang.opt +int f_trace_debug; + +// When doing WRITE statements, the IBM Language Reference and the ISO/IEC_2014 +// standard specify that when the ADVANCING clause is omitted, the default is +// AFTER ADVANCING 1 LINE. +// +// MicroFocus and GnuCOBOL state that the default is BEFORE ADVANCING 1 LINE +// +// During initial compiler development, we used Michael Coughlin's "Beginning +// COBOL For Programmers" textbook for source code examples, and it was clear +// from at least one sample program that his compiler used the Microfocus +// convention. For ease of development, we took on that same convention, but +// we provide here for a switch that changes that behavior: + +static bool auto_advance_is_AFTER_advancing = 0; + +/* This is a little complicated. In order to keep things general, we are + assuming that any function we call will be returning a 64-bit value. In + places where we know that not to be true, we'll have to do appropriate + casts. For example, main() returns an INT, as do functions that + return the default RETURN-CODE will have */ + +#define COBOL_FUNCTION_RETURN_TYPE SSIZE_T + +#define MAX_AFTERS 8 + +// These variables contol a little state machine. When a simple -main is in +// effect, the first program in the module becomes the target of a main() +// that we synthesize function. When -main=module:progid is in effect, we +// create a main() that calls progid. When active, progid is kept in +// the map main_strings. +static std::unordered_map main_strings; +static bool this_module_has_main = false; // sticky switch for the module +static bool next_program_is_main = false; // transient switch for the module +static char *main_entry_point = NULL; + +static bool static_call = true; +bool use_static_call( bool yn ) { return static_call = yn; } +static bool use_static_call() { return static_call; } + +// This global variable can be set upstream, like from a compiler +// command line switch. "1" for stdout, "2" for stderr, or "filename" + +const char *gv_trace_switch = NULL; + +// The environment variable wins over the command line +char const *bTRACE1 = NULL; +tree trace_handle; +tree trace_indent; +bool cursor_at_sol = true; + +static void +trace1_init() + { + static bool first_time = true; + if( first_time ) + { + first_time = false; + trace_handle = gg_define_variable(INT, "trace_handle", vs_static); + trace_indent = gg_define_variable(INT, "trace_indent", vs_static); + + bTRACE1 = getenv("TRACE1") ? getenv("TRACE1") : gv_trace_switch; + + if( bTRACE1 && strcmp(bTRACE1, "0") != 0 ) + { + if( strcmp(bTRACE1, "1") == 0 ) + { + gg_assign(trace_handle , integer_one_node); + } + else if( strcmp(bTRACE1, "2") == 0 ) + { + gg_assign(trace_handle , integer_two_node); + } + else + { + gg_assign(trace_handle , + gg_open(gg_string_literal(bTRACE1), + build_int_cst_type(INT, O_CREAT|O_WRONLY|O_TRUNC))); + } + } + else + { + // In case bTRACE1 pointed to an empty string + bTRACE1 = NULL; + } + } + } + +static void +create_cblc_string_variable(const char *var_name, const char *var_contents) + { + // This is a way of having the compiler communicate with GDB. I create a + // global const char[] string with a known name so that GDB can look for that + // variable and pick up its contents. + + // This probably should be in the .debug_info section, but for the moment I + // don't know how to do that, but I do know how to do this: + + tree array_of_characters = build_array_type_nelts(CHAR, strlen(var_contents)+1); + TYPE_NAME(array_of_characters) = get_identifier("cblc_string"); + tree constr = build_string(strlen(var_contents)+1, var_contents); + TREE_TYPE(constr) = array_of_characters; + TREE_STATIC(constr) = 1; + TREE_CONSTANT(constr) = 1; + tree entry_point = gg_declare_variable(array_of_characters, + var_name, + constr, + vs_external); + gg_define_from_declaration(entry_point); + } + +static void +build_main_that_calls_something(const char *something) + { + // This routine generates main(), which has as its body a call to "something". + // which is a call to a simple `extern int something(void)` routine. + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" main will call ") + SHOW_PARSE_TEXT(something) + SHOW_PARSE_END + } + + gg_set_current_line_number(DEFAULT_LINE_NUMBER); + + gg_define_function( INT, + "main", + INT, "argc", + build_pointer_type(CHAR_P), "argv", + NULL_TREE); + + // Pick up pointers to the input parameters: + // First is the INT which is the number of argv[] entries + tree argc = DECL_ARGUMENTS(current_function->function_decl); + // Second is the char **argv + tree argv = TREE_CHAIN(argc); // overall source length + + gg_call( VOID, + "__gg__stash_argc_argv", + argc, + argv, + NULL_TREE); + + // Call the top-level COBOL function. We know it has to return an INT, + // so we need to cast it from the SIZE_T that all COBOL are assumed + // to return: + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("main calls \"", something, "\"") + TRACE1_END + } + + // Let MODULE-NAME know that we were launched by a generated -main program + gg_call(VOID, + "__gg__module_name_push", + gg_string_literal("Mmain"), + NULL_TREE); + + char *psz = cobol_name_mangler(something); + gg_assign(var_decl_main_called, integer_one_node); + gg_return(gg_cast(INT, gg_call_expr( COBOL_FUNCTION_RETURN_TYPE, + psz, + argc, + argv, + NULL_TREE))); + strncpy(ach_cobol_entry_point, psz, sizeof(ach_cobol_entry_point)-1); + free(psz); + gg_finalize_function(); + } + +static std::unordered_mapgotos_labels; +#define LABEL_COUNT_OFFSET 100 + +static +tree +get_field_p(size_t index) + { + if(index) + { + cbl_field_t *field = cbl_field_of(symbol_at(index)); + + if( !field->var_decl_node ) + { + dbgmsg("%s (type: %s) improperly has a NULL var_decl_node", + field->name, + cbl_field_type_str(field->type)); + cbl_internal_error( + "Probable cause: it was referenced without being defined."); + } + + return gg_get_address_of(field->var_decl_node); + } + else + { + return gg_cast(cblc_field_p_type_node, null_pointer_node); + } + } + +static +char * +level_88_helper(size_t parent_capacity, + const cbl_domain_elem_t &elem, + size_t &returned_size) + { + // We return a MALLOCed return value, which the caller must free. + char *retval = (char *)xmalloc(parent_capacity + 64); + char *builder = (char *)xmalloc(parent_capacity + 64); + size_t nbuild = 0; + + cbl_figconst_t figconst = cbl_figconst_of( elem.name()); + if( figconst ) + { + nbuild = 1; + strcpy(retval, "1Fx"); + switch(figconst) + { + case normal_value_e : + // This really should never happend + abort(); + break; + case low_value_e : + retval[2] = 'L'; + break; + case zero_value_e : + retval[2] = 'Z'; + break; + case space_value_e : + retval[2] = 'S'; + break; + case quote_value_e : + retval[2] = 'Q'; + break; + case high_value_e : + retval[2] = 'H'; + break; + case null_value_e: + retval[2] = '\0'; + break; + } + returned_size = 3; + } + else + { + // We are working with an ordinary string. + + // Pick up the string + size_t first_name_length = elem.size(); + char *first_name = (char *)xmalloc(first_name_length + 1); + memcpy(first_name, elem.name(), first_name_length); + first_name[first_name_length] = '\0'; + + // Convert it to EBCDIC, when necessary; leave it alone when not necessary. + for(size_t i=0; itype != FldClass || var->level != 88 ) + { + returned_size = 0; + return NULL; + } + + // Entering here means we know that this is FldClass of level 88 + + // We convert the incoming information at var->data.domains to a single + // stream of bytes. We return a malloced pointer to that stream; returned + // size is the size of the stream. + + // The nature of an 88 is that each element is a pair + + // The following pairs are zero-terminated strings. It thus + // follows that the strings cannot contain '\0' characters. + + // Each element of the pair is converted to a stream: + // For strings of bytes: + // ddd A + // For figurative constants: + // 1Fx, where x is in [LZSQH], for LOW-VALUE ZERO SPACE QUOTE HIGH-VALUE + + // Numerics are converted to strings, and handled as above + + size_t retval_capacity = 64; + char *retval = (char *)xmalloc(retval_capacity); + size_t output_index = 0; + + // Loop through the provided domains: + returned_size = 0; + const struct cbl_domain_t *domain = var->data.domain; + while( domain->first.name() ) + { + // We have another pair to process + size_t stream_len; + char *stream; + + // Do the first element of the domain + stream = level_88_helper(parent_capacity, domain->first, stream_len); + if( output_index + stream_len > retval_capacity ) + { + retval_capacity *= 2; + retval = (char *)xrealloc(retval, retval_capacity); + } + memcpy(retval + output_index, stream, stream_len); + output_index += stream_len; + returned_size += stream_len; + free(stream); + + // Do the second element of the domain + stream = level_88_helper(parent_capacity, domain->last, stream_len); + if( output_index + stream_len > retval_capacity ) + { + retval_capacity *= 2; + retval = (char *)xrealloc(retval, retval_capacity); + } + memcpy(retval + output_index, stream, stream_len); + output_index += stream_len; + returned_size += stream_len; + free(stream); + domain += 1; + } + retval[returned_size++] = '\0'; + return retval; + } + +static +char * +get_class_condition_string(cbl_field_t *var) + { + // We know at this point that var is FldClass + // The LEVEL is not 88, so this is a CLASS SPECIAL-NAME + + const struct cbl_domain_t *domain = var->data.domain; + + /* There are five possibilities we need to deal with. + + 66 + 66 THROUGH 91 + 91 THROUGH 66 // This is the same as 66 THROUGH 91 + "A" + "A" THROUGH "Z + "Z" THROUGH "A" // This is the same as "A" THROUGH "Z" + "ABCJ12" // This is the same as "A" "B" "C" ... + + Expressly presented numbers are the ordinal positions in the run-time + character set. So, an ASCII "A" would be given as 66, which is one + greater than 65, which is the ASCII codepoint for "A". An EBCDIC "A" + would be presented as 194, which is one greater than 193, which is the + decimal representation of an EBCDIC "A", whose hex code is 0xC2. + + We need to account for EBCDIC as well as ASCII. In EBCDIC, + "A" THROUGH "Z" doesn't mean what it looks like it means, because EBCIDC + encoding has gaps between I and J, and between R and S. That isn't true + in ASCII. We don't want to deal with these issues at compile time, so we + are encoding numeric ordinals with their negated values, while other + characters are given as the numeric forms of their ASCII encoding. + Conversion to EBCDIC occurs at runtime. + + In support of this strategy, character strings like "ABCD" are broken up + into "A" "B" "C" "D" and converted to their hexadecimal representations. + */ + + char ach[8192]; + memset(ach, 0, sizeof(ach)); + char *p = ach; + + while( domain->first.is_numeric || domain->first.name() ) + { + // *What* were they smoking back then? + + uint8_t value1; + uint8_t value2; + + char achFirstName[256]; + char achLastName[256]; + + size_t first_name_length = domain->first.size() + ? domain->first.size() + : strlen(domain->first.name()); + size_t last_name_length = domain->last.size() + ? domain->last.size() + : strlen(domain->last.name()); + + if( domain->first.is_numeric ) + { + if( strlen(ach) > sizeof(ach) - 1000 ) + { + cbl_internal_error("Nice try, but you can't fire me. I quit!"); + } + + // We are working with unquoted strings that contain the values 1 through + // 256: + value1 = (uint8_t)atoi(domain->first.name()); + value2 = (uint8_t)atoi(domain->last.name()); + if( value2 < value1 ) + { + std::swap(value1, value2); + } + if( value1 != value2 ) + { + p += sprintf(p, "-%2.2X/-%2.2X ", value1-1, value2-1); + } + else + { + p += sprintf(p, "-%2.2X ", value1-1); + } + } + else if( first_name_length == 1 ) + { + // Since the first.name is a single character, we can do this as + // a single-character pair. + + // Keep in mind that the single character might be a two-byte UTF-8 + // codepoint + uint8_t ch1 = domain->first.name()[0]; + uint8_t ch2 = domain->last.name()[0]; + + gcc_assert(first_name_length <= 2); + gcc_assert(last_name_length <= 2); + + char *p2; + size_t one; + p2 = achFirstName; + one = 8; + raw_to_internal(&p2, &one, domain->last.name(), last_name_length); + ch2 = achFirstName[0]; + + p2 = achLastName; + one = 8; + raw_to_internal(&p2, &one, domain->first.name(), first_name_length); + ch1 = achLastName[0]; + + if( ch1 < ch2 ) + { + value1 = ch1; + value2 = ch2; + } + else + { + value2 = ch1; + value1 = ch2; + } + if( value1 != value2 ) + { + p += sprintf(p, "%2.2X/%2.2X ", value1, value2); + } + else + { + p += sprintf(p, "%2.2X ", value1); + } + } + else + { + gcc_assert( first_name_length > 1 ); + + // We are working with a string larger than 1 character. The COBOL + // spec says there can't be a THROUGH, so we ignore the last.name: + char *p2; + size_t one; + p2 = achFirstName; + one = 8; + raw_to_internal(&p2, &one, domain->last.name(), last_name_length); + + for(size_t i=0; inode == that.node; + } + }; +}; + +static std::map > call_targets; +static std::map called_targets; + +static void +parser_call_target( tree func ) + { + cbl_call_convention_t convention = current_call_convention(); + const char *name = IDENTIFIER_POINTER( DECL_NAME(func) ); + program_reference_t key(current_program_index(), name); + + // Each func is unique and inserted only once. + assert( called_targets.find(func) == called_targets.end() ); + called_targets[func] = convention; + + called_tree_t value(func, convention); + auto& p = call_targets[key]; + p.push_back(value); + } + +/* + * Is the node a recorded call target? The language-dependent + * function cobol_set_decl_assembler_name will lower-case the name + * unless, for a specific call, this function returns + * cbl_call_verbatim_e. + */ +cbl_call_convention_t +parser_call_target_convention( tree func ) + { + auto p = called_targets.find(func); + if( p != called_targets.end() ) return p->second; + + return cbl_call_cobol_e; + } + +void +parser_call_targets_dump() + { + dbgmsg( "call targets for #%zu", current_program_index() ); + for( const auto& elem : call_targets ) { + const auto& k = elem.first; + const auto& v = elem.second; + fprintf(stderr, "\t#%-3zu %s calls %s ", + k.caller, cbl_label_of(symbol_at(k.caller))->name, k.called); + char ch = '['; + for( auto func : v ) { + fprintf( stderr, "%c %s", ch, IDENTIFIER_POINTER(DECL_NAME(func.node)) ); + ch = ','; + } + fprintf(stderr, " ]\n"); + } + } + +size_t +parser_call_target_update( size_t caller, + const char plain_name[], + const char mangled_name[] ) + { + auto key = program_reference_t(caller, plain_name); + auto p = call_targets.find(key); + if( p == call_targets.end() ) return 0; + + for( auto func : p->second ) + { + func.convention = cbl_call_verbatim_e; + DECL_NAME(func.node) = get_identifier(mangled_name); + } + return p->second.size(); + } + +static tree +function_handle_from_name(cbl_refer_t &name, + tree function_return_type) + { + Analyze(); + + tree function_type = build_varargs_function_type_array( + function_return_type, + 0, + NULL); + tree function_pointer = build_pointer_type(function_type); + tree function_handle = gg_define_variable(function_pointer, "..function_handle.1", vs_stack); + + if( name.field->type == FldPointer ) + { + // If the parameter is a pointer, just pick up the value and head for the + // exit + if( refer_is_clean(name) ) + { + gg_memcpy(gg_get_address_of(function_handle), + member(name.field->var_decl_node, "data"), + build_int_cst_type(SIZE_T, sizeof(void *))); + } + else + { + gg_memcpy(gg_get_address_of(function_handle), + qualified_data_source(name), + build_int_cst_type(SIZE_T, sizeof(void *))); + } + return function_handle; + } + else if( use_static_call() && is_literal(name.field) ) + { + // It's a literal, and we are using static calls. Generate the CALL, and + // pass the address expression to parser_call_target(). That will cause + // parser_call_target_update() to replace any nested CALL "foo" with the + // local "foo.60" name. + + // We create a reference to it, which is later resolved by the linker. + tree addr_expr = gg_get_function_address( function_return_type, + name.field->data.initial); + gg_assign(function_handle, addr_expr); + + tree func = TREE_OPERAND(addr_expr, 0); + parser_call_target(func); // add function to list of call targets + } + else + { + // This is not a literal or static + if( name.field->type == FldLiteralA ) + { + gg_assign(function_handle, + gg_cast(build_pointer_type(function_type), + gg_call_expr(VOID_P, + "__gg__function_handle_from_literal", + build_int_cst_type(INT, current_function->our_symbol_table_index), + gg_string_literal(name.field->data.initial), + NULL_TREE))); + } + else + { + gg_assign(function_handle, + gg_cast(build_pointer_type(function_type), + gg_call_expr( VOID_P, + "__gg__function_handle_from_name", + build_int_cst_type(INT, current_function->our_symbol_table_index), + gg_get_address_of(name.field->var_decl_node), + refer_offset_source(name), + refer_size_source( name), + NULL_TREE))); + } + } + + return function_handle; + } + +void +parser_initialize_programs(size_t nprogs, struct cbl_refer_t *progs) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" ") + for( size_t i=0; i 0 ) + { + SHOW_PARSE_INDENT + } + if( progs[i].field->type == FldLiteralA ) + { + SHOW_PARSE_TEXT("\"") + SHOW_PARSE_TEXT(progs[i].field->data.initial) + SHOW_PARSE_TEXT("\"") + } + else + { + SHOW_PARSE_TEXT("") + SHOW_PARSE_TEXT(progs[i].field->name) + } + } + SHOW_PARSE_END + } + + for( size_t i=0; i ... TEST AFTER ... UNTIL ... + gg_set_current_line_number(CURRENT_LINE_NUMBER-1); + gg_assign(var_decl_nop, build_int_cst_type(INT, 106)); + } + + gg_set_current_line_number(CURRENT_LINE_NUMBER); + } + +static void +initialize_variable_internal( cbl_refer_t refer, + bool explicitly=false, + bool just_once=false) + { + // fprintf(stderr, "initialize_variable_internal for %s\n", refer.field->name); + // gg_printf("initialize_variable_internal for %s\n", + // gg_string_literal(refer.field->name), + // NULL_TREE); + cbl_field_t *parsed_var = refer.field; + + if( parsed_var->type == FldLiteralA ) + { + return; + } + + if( parsed_var->is_key_name() ) + { + // This field is actually a placeholder for a RECORD KEY alias. It didn't + // go through parser_symbol_add(), and so any attempt to initialize it + // results in an error because there is no var_decl_node. + return; + } + + if( is_register_field( parsed_var) ) + { + return; + } + + if( parsed_var && parsed_var->type == FldBlob ) + { + return; + } + + Analyze(); + SHOW_PARSE + { + do + { + fprintf( stderr, + "( %d ) %s():", + CURRENT_LINE_NUMBER, + __func__); + } + while(0); + SHOW_PARSE_REF(" ", refer); + if( parsed_var->data.initial ) + { + SHOW_PARSE_TEXT(" >>") + if( parsed_var->level == 88) + { + size_t returned_size = 0; + char *string88 = get_level_88_domain(0, parsed_var, returned_size); + + char *p = string88; + bool first = true; + while(*p) + { + char *pend; + size_t length1 = strtoull(p, &pend, 10); + char *string1 = pend + 1; + char flag = *pend; + p = string1 + length1; + if(flag == 'A' ) + { + char ach2[] = "x"; + SHOW_PARSE_TEXT("\"") + for(size_t i=0; itype == FldClass ) + { + char *p = get_class_condition_string(parsed_var); + SHOW_PARSE_TEXT(p); + free(p); + } + else + { + switch(parsed_var->type) + { + case FldGroup: + case FldAlphanumeric: + case FldNumericEdited: + case FldAlphaEdited: + case FldLiteralA: + SHOW_PARSE_TEXT(parsed_var->data.initial); + break; + default: + { + char ach[128]; + strfromf128(ach, sizeof(ach), "%.16E", parsed_var->data.value); + SHOW_PARSE_TEXT(ach); + break; + } + } + + } + SHOW_PARSE_TEXT("<<") + } + SHOW_PARSE_END + } + + CHECK_FIELD(parsed_var); + + // When initializing a variable, we have to ignore any DEPENDING ON clause + // that might otherwise apply + suppress_dest_depends = true; + + bool is_redefined = false; + + cbl_field_t *family_tree = parsed_var; + while(family_tree) + { + if( symbol_redefines(family_tree) ) + { + is_redefined = true; + break; + } + + family_tree = parent_of(family_tree); + } + + if( parsed_var->level == 66 ) + { + // Treat RENAMES as if they are redefines: + is_redefined = true; + } + + if( parsed_var->data.initial ) + { + bool a_parent_initialized = false; + cbl_field_t *parent = parent_of(parsed_var); + while( parent ) + { + if( parent->attr & has_value_e ) + { + a_parent_initialized = true; + break; + } + parent = parent_of(parent); + } + if( !a_parent_initialized ) + { + parsed_var->attr |= has_value_e; + } + } + + static const int DEFAULT_BYTE_MASK = 0x00000000FF; + static const int NSUBSCRIPT_MASK = 0x0000000F00; + static const int NSUBSCRIPT_SHIFT = 8; + static const int DEFAULTBYTE_BIT = 0x0000001000; + static const int EXPLICIT_BIT = 0x0000002000; + static const int REDEFINED_BIT = 0x0000004000; + static const int JUST_ONCE_BIT = 0x0000008000; + + int flag_bits = 0; + flag_bits |= explicitly ? EXPLICIT_BIT : 0; + flag_bits |= is_redefined && !explicitly ? REDEFINED_BIT : 0 ; + flag_bits |= wsclear() + ? DEFAULTBYTE_BIT + (*wsclear() & DEFAULT_BYTE_MASK) + : 0; + flag_bits |= (refer.nsubscript << NSUBSCRIPT_SHIFT) & NSUBSCRIPT_MASK; + flag_bits |= just_once ? JUST_ONCE_BIT : 0 ; + + suppress_dest_depends = false; // Set this to false so that refer_is_clean is valid + //fprintf(stderr, "refer_is_clean %2.2d %s %d 0x%lx\n", refer.field->level, refer.field->name, refer_is_clean(refer), refer.field->attr); + + if( !refer_is_clean(refer) ) + { + gg_call(VOID, + "__gg__initialize_variable", + gg_get_address_of(refer.field->var_decl_node), + refer_offset_dest(refer), + build_int_cst_type(INT, flag_bits), + NULL_TREE); + } + else + { + // We have a clean refer with no mods, so we can send just the pointer to + // the field + gg_call(VOID, + "__gg__initialize_variable_clean", + gg_get_address_of(refer.field->var_decl_node), + build_int_cst_type(INT, flag_bits) , + NULL_TREE); + } + + suppress_dest_depends = true; + + TRACE1 + { + TRACE1_HEADER + if( refer.field->level ) + { + gg_fprintf( trace_handle, + 1, "%2.2d ", + build_int_cst_type(INT, refer.field->level)); + } + TRACE1_REFER_INFO("", refer) + if( refer.field->level == 88 ) + { + TRACE1_TEXT(" ["); + + size_t returned_size = 0; + char *string88 = get_level_88_domain(0, parsed_var, returned_size); + + char *p = string88; + bool first = true; + while(*p) + { + char *pend; + size_t length1 = strtoull(p, &pend, 10); + char *string1 = pend + 1; + char flag = *pend; + p = string1 + length1; + if( flag == 'A' ) + { + char ach2[] = "x"; + TRACE1_TEXT("\"") + for(size_t i=0; itype == FldClass ) + { + char *p = get_class_condition_string(parsed_var); + TRACE1_TEXT(p); + free(p); + } + else + { + TRACE1_FIELD_VALUE("", parsed_var, "") + } + TRACE1_END + } + suppress_dest_depends = false; + } + +//static void +//initialize_variable_internal( cbl_field_t *field, +// bool explicitly=false, +// bool just_once=false) +// { +// cbl_refer_t wrapper(field); +// initialize_variable_internal( wrapper, +// explicitly, +// just_once); +// } + +void +parser_initialize(cbl_refer_t refer, bool like_parser_symbol_add) + { + //gg_printf("parser_initialize %s\n", gg_string_literal(refer.field->name), NULL_TREE); + if( like_parser_symbol_add ) + { + initialize_variable_internal(refer); + } + else + { + gcc_assert(refer.field->data.initial); + static const bool explicitly = true; + initialize_variable_internal(refer, explicitly); + } + } + +static void +get_binary_value_from_float(tree value, + cbl_refer_t &dest, + cbl_field_t *source, + tree source_offset + ) + { + // The destination is something with rdigits; the source is FldFloat + tree ftype; + switch( source->data.capacity ) + { + case 4: + ftype = FLOAT; + break; + case 8: + ftype = DOUBLE; + break; + case 16: + ftype = FLOAT128; + break; + default: + gcc_unreachable(); + break; + } + tree fvalue = gg_define_variable(ftype); + gg_assign(fvalue, + gg_indirect(gg_cast(build_pointer_type(ftype), + gg_add( member(source->var_decl_node,"data"), + source_offset)))); + + // We need to convert the floating point value to an integer value with the + // rdigits lined up properly. + + int rdigits = get_scaled_rdigits( dest.field ); + gg_assign(fvalue, + gg_multiply(fvalue, + gg_float(ftype, + build_int_cst_type(INT, + get_power_of_ten(rdigits))))); + + // And we need to throw away any digits to the left of the leftmost digits: + // At least, we need to do so in principl. I am deferring this problem until + // I understand it better. + + // We now have a floating point value that has been multiplied by 10**rdigits + gg_assign(value, gg_trunc(TREE_TYPE(value), fvalue)); + } + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +static void +gg_attribute_bit_clear(struct cbl_field_t *var, cbl_field_attr_t bits) + { + gg_assign( member(var, "attr"), + gg_bitwise_and( member(var, "attr"), + gg_bitwise_not( build_int_cst_type(SIZE_T, bits) ))); + } + +static +tree +gg_attribute_bit_get(struct cbl_field_t *var, cbl_field_attr_t bits) + { + tree retval = gg_bitwise_and( member(var, "attr"), + build_int_cst_type(SIZE_T, bits) ); + return retval; + } + +static void +gg_attribute_bit_set(struct cbl_field_t *var, cbl_field_attr_t bits) + { + gg_assign( member(var, "attr"), + gg_bitwise_or(member(var, "attr"), + build_int_cst_type(SIZE_T, bits))); + } +#pragma GCC diagnostic pop + +static void +gg_default_qualification(struct cbl_field_t * /*var*/) + { +// gg_attribute_bit_clear(var, refmod_e); + } + +static void +gg_get_depending_on_value(tree depending_on, cbl_field_t *current_sizer) + { + // We have to deal with the possibility of a DEPENDING_ON variable, + // and we have to apply array bounds whether or not there is a DEPENDING_ON + // variable: + + tree occurs_lower = gg_define_variable(LONG, "_lower"); + tree occurs_upper = gg_define_variable(LONG, "_upper"); + + gg_assign(occurs_lower, build_int_cst_type(LONG, current_sizer->occurs.bounds.lower)); + gg_assign(occurs_upper, build_int_cst_type(LONG, current_sizer->occurs.bounds.upper)); + + if( current_sizer->occurs.depending_on ) + { + // Get the current value of the depending_on data-item: + tree value = gg_define_int128(); + get_binary_value( value, + NULL, + cbl_field_of(symbol_at(current_sizer->occurs.depending_on)), + size_t_zero_node); + gg_assign(depending_on, gg_cast(LONG, value)); + IF( depending_on, lt_op, occurs_lower ) + // depending_is can be no less than occurs_lower: + gg_assign(depending_on, occurs_lower ); + ELSE + ENDIF + IF( depending_on, gt_op, occurs_upper ) + // depending_is can be no greater than occurs_upper: + gg_assign(depending_on, occurs_upper ); + ELSE + ENDIF + } + else + { + gg_assign(depending_on, occurs_upper); + } + } + +static int +digits_to_bytes(int digits) + { + int retval; + if( digits <= 2 ) + { + retval = 1; + } + else if( digits <= 4 ) + { + retval = 2; + } + else if( digits <= 9 ) + { + retval = 4; + } + else if( digits <= 18 ) + { + retval = 8; + } + else + { + retval = 16; + } + return retval; + } + +static size_t +get_bytes_needed(cbl_field_t *field) + { + size_t retval = 0; + switch(field->type) + { + case FldIndex: + case FldPointer: + case FldFloat: + case FldLiteralN: + retval = field->data.capacity; + break; + + case FldNumericDisplay: + { + int digits; + if( field->attr & scaled_e && field->data.rdigits<0) + { + digits = field->data.digits + -field->data.rdigits; + } + else + { + digits = field->data.digits; + } + retval = digits_to_bytes(digits); + break; + } + + case FldPacked: + { + int digits; + if( field->attr & scaled_e && field->data.rdigits<0) + { + digits = field->data.digits + -field->data.rdigits; + } + else + { + digits = field->data.digits; + } + if( !(field->attr & separate_e) ) + { + // This is COMP-3, so there is a sign nybble. + digits += 1; + } + retval = (digits+1)/2; + break; + } + + case FldNumericBinary: + case FldNumericBin5: + { + if( field->data.digits ) + { + int digits; + if( field->attr & scaled_e && field->data.rdigits<0) + { + digits = field->data.digits + -field->data.rdigits; + } + else + { + digits = field->data.digits; + } + retval = digits_to_bytes(digits); + } + else + { + retval = field->data.capacity; + } + break; + } + + default: + cbl_internal_error("%s(): Knows not the variable type %s for %s", + __func__, + cbl_field_type_str(field->type), + field->name ); + break; + } + return retval; + } + +static void +normal_normal_compare(bool debugging, + tree return_int, + cbl_refer_t *left_side_ref, + cbl_refer_t *right_side_ref, + tree left_side, + tree right_side ) + { + Analyze(); + + // If a value is intermediate_e, then the rdigits can vary at run-time, so + // we can't rely on the compile-time rdigits. + + bool left_intermediate = (left_side_ref->field->attr & intermediate_e); + bool right_intermediate = (right_side_ref->field->attr & intermediate_e); + + if( debugging ) + { + gg_printf("normal_normal_compare(): left_intermediate/right_intermediate %d/%d\n", + left_intermediate ? integer_one_node : integer_zero_node , + right_intermediate ? integer_one_node : integer_zero_node , + NULL_TREE); + } + + bool needs_adjusting; + if( !left_intermediate && !right_intermediate ) + { + // Yay! Both sides have fixed rdigit values. + + // Flag needs_adjusting as false, because we are going to do it here: + needs_adjusting = false; + int adjust = get_scaled_rdigits(left_side_ref->field) + - get_scaled_rdigits(right_side_ref->field); + + if( adjust > 0 ) + { + // We need to make right_side bigger to match the scale of left_side + scale_by_power_of_ten_N(right_side, adjust); + } + else if( adjust < 0 ) + { + // We need to make left_side bigger to match the scale of right_side + scale_by_power_of_ten_N(left_side, -adjust); + } + } + else + { + // At least one side is right_intermediate + + tree adjust; + if( !left_intermediate && right_intermediate ) + { + // left is fixed, right is intermediate + adjust = gg_define_int(); + gg_assign(adjust, + build_int_cst_type( INT, + get_scaled_rdigits(left_side_ref->field))); + + gg_assign(adjust, + gg_subtract(adjust, + gg_cast(INT, + member(right_side_ref->field->var_decl_node, + "rdigits")))); + needs_adjusting = true; + } + else if( left_intermediate && !right_intermediate ) + { + // left is intermediate, right is fixed + adjust = gg_define_int(); + gg_assign(adjust, gg_cast(INT, member(left_side_ref->field, "rdigits"))); + gg_assign(adjust, + gg_subtract(adjust, + build_int_cst_type( INT, + get_scaled_rdigits(right_side_ref->field)))); + needs_adjusting = true; + } + else // if( left_intermediate && right_intermediate ) + { + // Both sides are intermediate_e + adjust = gg_define_int(); + gg_assign(adjust, gg_cast(INT, member(left_side_ref->field, "rdigits"))); + gg_assign(adjust, + gg_subtract(adjust, + gg_cast(INT, + member(right_side_ref->field, "rdigits")))); + needs_adjusting = true; + } + + if( needs_adjusting ) + { + if( debugging ) + { + gg_printf("normal_normal_compare(): The value of adjust is %d\n", + adjust, + NULL_TREE); + } + IF( adjust, gt_op, integer_zero_node ) + { + // The right side needs to be scaled up + scale_by_power_of_ten(right_side, adjust); + } + ELSE + { + IF( adjust, lt_op, integer_zero_node ) + { + // The left side needs to be scaled up + scale_by_power_of_ten(left_side, gg_negate(adjust)); + } + ELSE + ENDIF + } + ENDIF + } + } + + if( TREE_TYPE(left_side) != TREE_TYPE(right_side) ) + { + // One is signed, the other isn't: + if( left_side_ref->field->attr & signable_e ) + { + // The left side can be negative. If it is, the return value has to be + // -1 for left < right + IF( left_side, lt_op, gg_cast(TREE_TYPE(left_side), integer_zero_node) ) + { + if( debugging ) + { + gg_printf("normal_normal_compare(): different types returning -1\n", + NULL_TREE); + } + gg_assign( return_int, integer_minusone_node); + } + ELSE + { + // Both sides are positive, allowing a direct comparison. + IF( gg_cast(TREE_TYPE(right_side), left_side), lt_op, right_side ) + { + if( debugging ) + { + gg_printf("normal_normal_compare(): returning -1\n", NULL_TREE); + } + gg_assign( return_int, integer_minusone_node); + } + ELSE + { + IF( gg_cast(TREE_TYPE(right_side), left_side), gt_op, right_side) + { + if( debugging ) + { + gg_printf("normal_normal_compare(): returning +1\n", NULL_TREE); + } + gg_assign( return_int, integer_one_node); + } + ELSE + { + if( debugging ) + { + gg_printf("normal_normal_compare(): returning zero\n", NULL_TREE); + } + gg_assign( return_int, integer_zero_node); + } + ENDIF + } + ENDIF + } + ENDIF + } + else + { + // The right side can be negative. If it is, the return value has to be + // +1 for left > right + IF( right_side, lt_op, gg_cast(TREE_TYPE(right_side), integer_zero_node) ) + { + if( debugging ) + { + gg_printf("normal_normal_compare(): different types returning +1\n", NULL_TREE); + } + gg_assign( return_int, integer_one_node); + } + ELSE + { + // Both sides are positive, allowing a direct comparison. + IF( left_side, lt_op, gg_cast(TREE_TYPE(left_side), right_side) ) + { + if( debugging ) + { + gg_printf("normal_normal_compare(): returning -1\n", NULL_TREE); + } + gg_assign( return_int, integer_minusone_node); + } + ELSE + { + IF( left_side, gt_op, gg_cast(TREE_TYPE(left_side), right_side) ) + { + if( debugging ) + { + gg_printf("normal_normal_compare(): returning +1\n", NULL_TREE); + } + gg_assign( return_int, integer_one_node); + } + ELSE + { + if( debugging ) + { + gg_printf("normal_normal_compare(): returning zero\n", NULL_TREE); + } + gg_assign( return_int, integer_zero_node); + } + ENDIF + } + ENDIF + } + ENDIF + } + } + else + { + // Both sides are the same type, allowing a direct comparison. + IF( left_side, lt_op, right_side ) + { + if( debugging ) + { + gg_printf("normal_normal_compare(): returning -1\n", NULL_TREE); + } + gg_assign( return_int, integer_minusone_node); + } + ELSE + { + IF( left_side, gt_op, right_side ) + { + if( debugging ) + { + gg_printf("normal_normal_compare(): returning +1\n", NULL_TREE); + } + gg_assign( return_int, integer_one_node); + } + ELSE + { + if( debugging ) + { + gg_printf("normal_normal_compare(): returning zero\n", NULL_TREE); + } + gg_assign( return_int, integer_zero_node); + } + ENDIF + } + ENDIF + } + } + +static void +compare_binary_binary(tree return_int, + cbl_refer_t *left_side_ref, + cbl_refer_t *right_side_ref ) + { + Analyze(); + static const bool debugging = false; + + // We know the two sides have binary values that can be extracted. + tree left_side; + tree right_side; + + // Use SIZE128 when we need two 64-bit registers to hold the value. All + // others fit into 64-bit LONG with pretty much the same efficiency. + + size_t left_bytes_needed = get_bytes_needed(left_side_ref->field); + size_t right_bytes_needed = get_bytes_needed(right_side_ref->field); + + if( left_bytes_needed >= SIZE128 + || right_bytes_needed >= SIZE128 ) + { + if( debugging ) + { + gg_printf("compare_binary_binary(): using int128\n", NULL_TREE); + } + + left_side = gg_define_int128(); + right_side = gg_define_int128(); + } + else + { + if( debugging ) + { + gg_printf("compare_binary_binary(): using int64\n", NULL_TREE); + } + left_side = gg_define_variable( left_side_ref->field->attr & signable_e ? LONG : ULONG ); + right_side = gg_define_variable(right_side_ref->field->attr & signable_e ? LONG : ULONG ); + } + + //tree dummy = gg_define_int(); + static tree hilo_left = gg_define_variable(INT, "..cbb_hilo_left", vs_file_static); + static tree hilo_right = gg_define_variable(INT, "..cbb_hilo_right", vs_file_static); + + get_binary_value(left_side, + NULL, + left_side_ref->field, + refer_offset_source(*left_side_ref), + hilo_left); + get_binary_value(right_side, + NULL, + right_side_ref->field, + refer_offset_source(*right_side_ref), + hilo_right); + IF( hilo_left, eq_op, integer_one_node ) + { + // left side is hi-value + IF( hilo_right, eq_op, integer_one_node ) + { + if( debugging ) + { + gg_printf("compare_binary_binary(): left and right are HIGH-VALUE\n", NULL_TREE); + } + gg_assign(return_int, integer_zero_node); + } + ELSE + { + if( debugging ) + { + gg_printf("compare_binary_binary(): left is HIGH-VALUE\n", NULL_TREE); + } + gg_assign(return_int, integer_one_node); + } + ENDIF + } + ELSE + { + // left is not HIGH-VALUE: + IF( hilo_left, eq_op, integer_minus_one_node ) + { + // left side is LOW-VALUE + IF( hilo_right, eq_op, integer_minus_one_node ) + { + if( debugging ) + { + gg_printf("compare_binary_binary(): left and right are LOW-VALUE\n", NULL_TREE); + } + gg_assign(return_int, integer_zero_node); + } + ELSE + { + // Right side is not low-value + if( debugging ) + { + gg_printf("compare_binary_binary(): left is LOW-VALUE\n", NULL_TREE); + } + gg_assign(return_int, integer_one_node); + } + ENDIF + } + ELSE + { + // Left side is normal + IF( hilo_right, eq_op, integer_one_node ) + { + if( debugging ) + { + gg_printf("compare_binary_binary(): right is HIGH-VALUE\n", NULL_TREE); + } + gg_assign(return_int, integer_minus_one_node); + } + ELSE + { + IF( hilo_right, eq_op, integer_minus_one_node ) + { + if( debugging ) + { + gg_printf("compare_binary_binary(): right is LOW-VALUE\n", NULL_TREE); + } + gg_assign(return_int, integer_one_node); + } + ELSE + { + if( debugging ) + { + gg_printf("compare_binary_binary(): left and right are normal\n", NULL_TREE); + } + normal_normal_compare(debugging, + return_int, + left_side_ref, + right_side_ref, + left_side, + right_side + ); + } + ENDIF + } + ENDIF + } + ENDIF + } + ENDIF + } + +#define DEBUG_COMPARE + +static void +cobol_compare( tree return_int, + cbl_refer_t &left_side_ref, + cbl_refer_t &right_side_ref ) + { + Analyze(); +// gg_printf("cobol_compare %s %s \"%s\" \"%s\"\n", + // gg_string_literal(left_side_ref.field->name), + // gg_string_literal(right_side_ref.field->name), + // member(left_side_ref.field, "data"), + // gg_string_literal(right_side_ref.field->data.initial), + // NULL_TREE); + + CHECK_FIELD(left_side_ref.field); + CHECK_FIELD(right_side_ref.field); + // This routine is in support of conditionals in the COBOL program. + // It takes two arbitrary COBOL variables from the parser and compares them + // according to a nightmarish set of rules. + + // See ISO/IEC 1989:2014(E) section 8.8.4.1.1 (page 153) + + // The return_int value is -1 when left_side < right_side + // 0 left_side == right_side + // 1 left_side > right_side + + bool compared = false; + + // In the effort to convert to in-line GIMPLE comparisons, I became flummoxed + // by comparisons involving REFMODs. This will have to be revisited, but for + // now I decided to keep using the libgcobol code, which according to NIST + // works properly. + + if( !left_side_ref.refmod.from + && !left_side_ref.refmod.len + && !right_side_ref.refmod.from + && !right_side_ref.refmod.len ) + { + cbl_refer_t *lefty = &left_side_ref; + cbl_refer_t *righty = &right_side_ref; + + int ntries = 1; + while( ntries <= 2 ) + { + switch( lefty->field->type ) + { + case FldLiteralN: + { + switch( righty->field->type ) + { + case FldLiteralN: + case FldNumericBinary: + case FldNumericBin5: + case FldPacked: + case FldNumericDisplay: + case FldIndex: + compare_binary_binary(return_int, lefty, righty); + compared = true; + break; + + case FldGroup: + case FldAlphanumeric: + case FldLiteralA: + { + // Comparing a FldLiteralN to an alphanumeric + // It is the case that data.initial is in the original form seen + // in the source code, which means that even in EBCDIC mode the + // characters are in the "ASCII" state. + + static size_t buffer_size = 0; + static char *buffer = NULL; + raw_to_internal(&buffer, + &buffer_size, + lefty->field->data.initial, + strlen(lefty->field->data.initial)); + + gg_assign( return_int, gg_call_expr( + INT, + "__gg__literaln_alpha_compare", + gg_string_literal(buffer), + gg_get_address_of(righty->field->var_decl_node), + refer_offset_source(*righty), + refer_size_source( *righty), + build_int_cst_type(INT, + (righty->all ? REFER_T_MOVE_ALL : 0)), + NULL_TREE)); + compared = true; + break; + } + + default: + break; + } + break; + } + + case FldNumericBin5: + case FldNumericBinary: + case FldPacked: + case FldNumericDisplay: + { + switch( righty->field->type ) + { + case FldNumericBin5: + case FldNumericBinary: + case FldPacked: + case FldNumericDisplay: + { + compare_binary_binary(return_int, lefty, righty); + compared = true; + break; + } + + default: + break; + } + break; + } + + default: + break; + } + if( compared ) + { + break; + } + // We weren't able to compare left/right. Let's see if we understand + // right/left + std::swap(lefty, righty); + ntries += 1; + } + + if( compared && ntries == 2 ) + { + // We have a successful comparision, but we managed it on the second try, + // which means our result has the wrong sign. Fix it: + gg_assign(return_int, gg_negate(return_int)); + } + } + + if( !compared ) + { + // None of our explicit comparisons up above worked, so we revert to the + // general case: + int leftflags = (left_side_ref.all ? REFER_T_MOVE_ALL : 0) + + (left_side_ref.addr_of ? REFER_T_ADDRESS_OF : 0); + int rightflags = (right_side_ref.all ? REFER_T_MOVE_ALL : 0) + + (right_side_ref.addr_of ? REFER_T_ADDRESS_OF : 0); + gg_assign( return_int, gg_call_expr( + INT, + "__gg__compare", + gg_get_address_of(left_side_ref.field->var_decl_node), + refer_offset_source(left_side_ref), + refer_size_source( left_side_ref), + build_int_cst_type(INT, leftflags), + gg_get_address_of(right_side_ref.field->var_decl_node), + refer_offset_source(right_side_ref), + refer_size_source( right_side_ref), + build_int_cst_type(INT, rightflags), + integer_zero_node, + NULL_TREE)); + } + +// gg_printf(" result is %d\n", return_int, NULL_TREE); + } + +static void +move_tree( cbl_field_t *dest, + tree offset, + tree psz_source, + tree length_bump=integer_zero_node) // psz_source is a null-terminated string + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", dest); + SHOW_PARSE_END + } + + bool moved = true; + + tree source_length = gg_define_size_t(); + gg_assign(source_length, gg_strlen(psz_source)); + gg_assign(source_length, gg_add(source_length, gg_cast(SIZE_T, length_bump))); + + tree min_length = gg_define_size_t(); + + tree location = gg_define_uchar_star(); + tree length = gg_define_size_t(); + + gg_assign(location, + gg_add(member(dest->var_decl_node, "data"), + offset)); + gg_assign(length, + member(dest->var_decl_node, "capacity")); + + IF(source_length, lt_op, length) + { + gg_assign(min_length, source_length); + } + ELSE + { + gg_assign(min_length, length); + } + ENDIF + + tree value; + tree rdigits; + + switch( dest->type ) + { + case FldGroup: + case FldAlphanumeric: + // Space out the alphanumeric destination: + gg_memset( location, + build_int_cst_type(INT, internal_space), + length ); + // Copy the alphanumeric result over. + gg_memcpy( location, + psz_source, + min_length ); + break; + + case FldNumericDisplay: + case FldNumericEdited: + case FldNumericBinary: + case FldNumericBin5: + case FldPacked: + case FldIndex: + { + value = gg_define_int128(); + rdigits = gg_define_int(); + + gg_assign(value, + gg_call_expr( INT128, + "__gg__dirty_to_binary_internal", + psz_source, + source_length, + gg_get_address_of(rdigits), + NULL_TREE)); + + gg_call(VOID, + "__gg__int128_to_qualified_field", + gg_get_address_of(dest->var_decl_node), + offset, + build_int_cst_type(SIZE_T, dest->data.capacity), + value, + rdigits, + build_int_cst_type(INT, truncation_e), + null_pointer_node, + NULL_TREE); + } + break; + + case FldAlphaEdited: + { + gg_call(VOID, + "__gg__string_to_alpha_edited_ascii", + location, + psz_source, + min_length, + member(dest->var_decl_node, "picture"), + NULL); + break; + } + + default: + moved = false; + break; + } + + TRACE1 + { + TRACE1_HEADER + gg_fprintf(trace_handle, 1, "source: \"%s\"", psz_source); + TRACE1_END + TRACE1_INDENT + TRACE1_FIELD( "dest : ", dest, "") + TRACE1_END + } + + if( !moved ) + { + dbgmsg("###### %10s in %s:%d\n", __func__, __FILE__, __LINE__ ); + cbl_internal_error( "I don't know how to MOVE an alphabetical string to %s(%s) \n", + cbl_field_type_str(dest->type), + dest->name + ); + return; + } + } + +static void +move_tree_to_field(cbl_field_t *field, tree psz) + { + move_tree(field, integer_zero_node, psz); + } + +static tree +get_string_from(cbl_field_t *field) + { + // This returns a malloced copy of either a literal string or a + // an alphanumeric field. The idea is that eventually free() will be + // called in the runtime space: + + tree psz = gg_define_char_star(); + + if( field ) + { + switch( field->type ) + { + case FldLiteralA: + { + gg_assign(psz, + gg_cast(CHAR_P, + gg_malloc(build_int_cst_type(SIZE_T, + field->data.capacity+1)))); + char *litstring = get_literal_string(field); + gg_memcpy(psz, + gg_string_literal(litstring), + build_int_cst_type(SIZE_T, field->data.capacity+1)); + break; + } + + case FldGroup: + case FldAlphanumeric: + // make a copy of .data: + gg_assign(psz, + gg_cast(CHAR_P, + gg_malloc(build_int_cst_type(SIZE_T, + field->data.capacity+1)))); + gg_memcpy( psz, + member(field, "data"), + member(field, "capacity")); + // null-terminate it: + gg_assign( gg_array_value(psz, member(field, "capacity")), + char_nodes[0]); + break; + + case FldForward: + { + // At the present time, we are assuming this happens when somebody + // specifies an unquoted file name in an ASSIGN statement: + // SELECT file3 ASSIGN DISK. + // + // In that case, we just return DISK, which is field->name: + psz = gg_strdup(gg_string_literal(field->name)); + break; + } + + default: + cbl_internal_error( + "%s(): field->type %s must be literal or alphanumeric", + __func__, cbl_field_type_str(field->type)); + break; + } + } + else + { + gg_assign(psz, gg_cast(CHAR_P, null_pointer_node)); + } + return psz; + } + +static char * +combined_name(cbl_label_t *label) + { + // This routine returns a pointer to a static, so make sure you use the result + // before calling the routine again + char *para_name = nullptr; + char *sect_name = nullptr; + const char *program_name = current_function->our_unmangled_name; + + if( label->type == LblParagraph ) + { + para_name = label->name; + + if( label->parent ) + { + // It's possible for implicit + cbl_label_t *section_label = cbl_label_of(symbol_at(label->parent)); + sect_name = section_label->name; + } + } + else + { + sect_name = label->name; + } + + static size_t retval_size = 256; + static char *retval= (char *)xmalloc(retval_size); + + char *paragraph = cobol_name_mangler(para_name); + char *section = cobol_name_mangler(sect_name); + char *mangled_program_name = cobol_name_mangler(program_name); + + while( retval_size < (paragraph ? strlen(paragraph) : 0 ) + + (section ? strlen(section) : 0 ) + + (mangled_program_name ? strlen(mangled_program_name) : 0 ) + + 24 ) + { + retval_size *= 2; + retval = (char *)xrealloc(retval, retval_size); + } + + *retval = '\0'; + char ach[24]; + if( paragraph ) + { + strcat(retval, paragraph); + } + strcat(retval, "."); + if( section ) + { + strcat(retval, section); + } + strcat(retval, "."); + if( mangled_program_name ) + { + strcat(retval, mangled_program_name); + } + sprintf(ach, ".%ld", current_function->program_id_number); + strcat(retval, ach); + sprintf(ach, ".%ld", symbol_label_id(label)); + strcat(retval, ach); + free(mangled_program_name); + free(section); + free(paragraph); + + return retval; + } + +// We implement SECTION and PARAGRAPH stuff before the rest of program +// structure, because we have some static routines in here that are called +// by enter_ and leave_ program, and so on. + +static void +assembler_label(const char *label) + { + // label has to be a valid label for the assembler + static size_t length = 0; + static char *build = nullptr; + + const char local_text[] = ":"; + if( length < strlen(label) + strlen(local_text) + 1 ) + { + length = strlen(label) + strlen(local_text) + 1; + free(build); + build = (char *)xmalloc(length); + } + + strcpy(build, label); + strcat(build, local_text); + + gg_insert_into_assembler(build); + } + +static void +section_label(struct cbl_proc_t *procedure) + { + // With nested programs, you can have multiple program/section pairs with the + // the same names; we use a deconflictor to avoid collisions + + gg_set_current_line_number(CURRENT_LINE_NUMBER); + + size_t deconflictor = symbol_label_id(procedure->label); + + cbl_label_t *label = procedure->label; + // The _initialize_program section isn't relevant. + static size_t psz_length = 256; + static char *psz = (char *)xmalloc(psz_length); + sprintf(psz, + "# SECTION %s in %s (%ld)", + label->name, + current_function->our_unmangled_name, + deconflictor); + gg_insert_into_assembler(psz); + + // The label has to start with an underscore. I tried a period, but those + // don't seem to show up in GDB's internal symbol tables. + char *combined = combined_name(procedure->label); + if( psz_length < strlen(combined) + 36 + 1 ) + { + free(psz); + psz_length = strlen(combined) + 36 + 1; + psz = (char *)xmalloc(psz_length); + } + sprintf(psz, + "_sect.%s", + combined_name(procedure->label)); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(psz); + SHOW_PARSE_END + } + assembler_label(psz); + gg_assign(var_decl_nop, build_int_cst_type(INT, 108)); + } + +static void +paragraph_label(struct cbl_proc_t *procedure) + { + // We need to give each paragraph a unique and assembler-compatible name + // that can be found and used by GDB. + // Complications: + // 1) paragraph names can be reused in the same program, provided they + // are in different sections. + // 2) paragraph names can be duplicated in a section, provided that they + // are not referenced by the program. We provide a deconflictor to + // separate such labels. + + gg_set_current_line_number(CURRENT_LINE_NUMBER); + + cbl_label_t *paragraph = procedure->label; + cbl_label_t *section = nullptr; + + if( procedure->label->parent ) + { + section = cbl_label_of(symbol_at(procedure->label->parent)); + } + + char *para_name = paragraph->name; + char *section_name = section ? section->name : nullptr; + + static size_t psz_length = 256; + static char *psz = (char *)xmalloc(psz_length); + + static size_t deconflictor = symbol_label_id(procedure->label); + + sprintf(psz, + "# PARAGRAPH %s of %s in %s (%ld)", + para_name, + section_name, + current_function->our_unmangled_name, + deconflictor); + gg_insert_into_assembler(psz); + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(psz); + SHOW_PARSE_END + } + + // The label has to start with an underscore. I tried a period, but those + // don't seem to show up in GDB's internal symbol tables. + char *combined = combined_name(procedure->label); + if( psz_length < strlen(combined) + 36 + 1 ) + { + free(psz); + psz_length = strlen(combined) + 36 + 1; + psz = (char *)xmalloc(psz_length); + } + + sprintf(psz, + "_para.%s", + combined_name(procedure->label)); + assembler_label(psz); + gg_assign(var_decl_nop, build_int_cst_type(INT, 109)); + } + +static void +pseudo_return_push(cbl_proc_t *procedure, tree return_addr) + { + // Put the return address onto the stack: + //gg_suppress_location(true); + + TRACE1 + { + TRACE1_HEADER + gg_printf("%s %p %p", + gg_string_literal(procedure->label->name), + gg_cast(SIZE_T, procedure->exit.addr), + return_addr, + NULL_TREE); + TRACE1_END + } + + gg_call(VOID, + "__gg__pseudo_return_push", + procedure->exit.addr, + return_addr, + NULL_TREE); + + //gg_suppress_location(false); + } + +static void +pseudo_return_pop(cbl_proc_t *procedure) + { + //gg_suppress_location(true); + + TRACE1 + { + TRACE1_HEADER + gg_printf("%s comparing proc_exit %p to global_exit %p -- ", + gg_string_literal(procedure->label->name), + gg_cast(SIZE_T, procedure->exit.addr), + var_decl_exit_address, + NULL_TREE); + } + + IF( var_decl_exit_address, eq_op, procedure->exit.addr ) + { + TRACE1 + { + TRACE1_TEXT("Returning") + } + // The top of the stack is us! + + // Pick up the return address from the pseudo_return stack: + gg_assign(current_function->void_star_temp, + gg_call_expr( VOID_P, + "__gg__pseudo_return_pop", + NULL_TREE)); + // And do the return: + gg_goto(current_function->void_star_temp); + } + ELSE + { + TRACE1 + { + TRACE1_TEXT("No match") + } + ENDIF + } + TRACE1 + { + TRACE1_END + } + //gg_suppress_location(false); + } + +static void +leave_procedure(struct cbl_proc_t *procedure, bool /*section*/) + { + if(procedure) + { + // fprintf(stderr, "LeavingProcedure: (%p) %s %p %p %p %p %p %p\n", + // procedure, + // procedure->name, + // procedure->top.go_to, + // procedure->top.label, + // procedure->exit.go_to, + // procedure->exit.label, + // procedure->bottom.go_to, + // procedure->bottom.label); + // Procedure can be null, for example at the beginning of a + // new program, or after somebody else has cleared it out. + gg_append_statement(procedure->exit.label); + + char ach[256]; + sprintf(ach, + "_procret.%ld:", + symbol_label_id(procedure->label)); + gg_insert_into_assembler(ach); + pseudo_return_pop(procedure); + gg_append_statement(procedure->bottom.label); + } + } + +static void +leave_section_internal() + { + Analyze(); + SHOW_PARSE + { + if(gg_trans_unit.function_stack.size() && current_function && current_function->current_section) + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(current_function->current_section->label->name) + SHOW_PARSE_END + } + } + + if( current_function->current_section ) + { + // gg_printf( "Leaving section %s\n", + // build_string_literal( strlen(current_function->current_section->label->name)+1, current_function->current_section->label->name), + // NULL_TREE); + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("\"", current_function->current_section->label->name, "\""); + TRACE1_END + } + leave_procedure(current_function->current_section, true); + + current_function->current_section = NULL; + } + else + { + //gg_printf("Somebody is leaving a section twice\n", NULL_TREE); + } + } + +void +parser_leave_section( struct cbl_label_t */*label*/ ) {} + +static void +leave_paragraph_impl() + { + Analyze(); + SHOW_PARSE + { + if(gg_trans_unit.function_stack.size() && current_function && current_function->current_paragraph) + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(current_function->current_paragraph->label->name) + SHOW_PARSE_END + } + } + + if( current_function->current_paragraph ) + { + // gg_printf( "Leaving paragraph %s\n", + // build_string_literal( strlen(current_function->current_paragraph->label->name)+1, current_function->current_paragraph->label->name), + // NULL_TREE); + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("\"", current_function->current_paragraph->label->name, "\""); + TRACE1_END + } + leave_procedure(current_function->current_paragraph, false); + current_function->current_paragraph = NULL; + } + else + { + //gg_printf("Somebody is leaving a paragraph twice\n", NULL_TREE); + } + } + +void parser_leave_paragraph( cbl_label_t * ) {} +static inline void leave_paragraph_internal() { leave_paragraph_impl(); } + +static struct cbl_proc_t * +find_procedure(cbl_label_t *label) + { +// SHOW_PARSE +// { +// SHOW_PARSE_HEADER +// SHOW_PARSE_LABEL(" ", label) +// SHOW_PARSE_TEXT("\n"); +// } + + cbl_proc_t *retval = label->structs.proc; + + // We have to cope with an oddball circumstance. When label->entered is + // greater than zero, it means that a paragraph with this label has been + // entered and left already. This means that a paragraph name has been + // defined more than once. Had it been referenced with a GOTO or PERFORM, + // that would have been a syntax error. + // + // + // In this case, we need to replace the existing cbl_proc_t structure. We + // will be laying down labels for this second (or more) instance of + // parser_enter_paragraph, and we must create different labels. + + if( !retval ) + { + static int counter=1; + char ach[2*sizeof(cbl_name_t)]; + + // This is a new section or paragraph; we need to create its values: + retval = (struct cbl_proc_t *)xmalloc(sizeof(struct cbl_proc_t)); + retval->label = label; + + gg_create_goto_pair(&retval->top.go_to, + &retval->top.label, + &retval->top.addr, + &retval->top.decl); + gg_create_goto_pair(&retval->exit.go_to, + &retval->exit.label, + &retval->exit.addr + ); + gg_create_goto_pair(&retval->bottom.go_to, + &retval->bottom.label, + &retval->bottom.addr + ); + + // fprintf(stderr, "NewProcedure: (%p) %s %p %p %p %p %p %p\n", + // retval, + // retval->name, + // retval->top.go_to, + // retval->top.label, + // retval->exit.go_to, + // retval->exit.label, + // retval->bottom.go_to, + // retval->bottom.label); + + // If this procedure is a paragraph, and it becomes the target of + // an ALTER statement, alter_location will be used to make that change + sprintf(ach, "_%s_alter_loc_%d", label->name, counter); + retval->alter_location = gg_define_void_star(ach, vs_static); + DECL_INITIAL(retval->alter_location) = null_pointer_node; + + counter +=1 ; + + label->structs.proc = retval; + } + + return retval; + } + +void +parser_enter_section(cbl_label_t *label) + { + Analyze(); + // Do the leaving before the SHOW_PARSE; it makes the output more sensible + // A new section ends the current paragraph: + leave_paragraph_internal(); + + // And the current section: + leave_section_internal(); + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_LABEL(" ", label) + SHOW_PARSE_END + } + + CHECK_LABEL(label); + + // This NOP is needed to give GDB a line number for the entry point of + // paragraphs + gg_set_current_line_number(CURRENT_LINE_NUMBER); + gg_assign(var_decl_nop, build_int_cst_type(INT, 101)); + + struct cbl_proc_t *procedure = find_procedure(label); + gg_append_statement(procedure->top.label); + section_label(procedure); + current_function->current_section = procedure; + + TRACE1 + { + TRACE1_HEADER + TRACE1_LABEL("\"", label, "\"") + TRACE1_END + } + } + +void +parser_enter_paragraph(cbl_label_t *label) + { + Analyze(); + // Do the leaving before the SHOW_PARSE; the output makes more sense that way + // A new paragraph ends the current paragraph: + leave_paragraph_internal(); + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_LABEL(" ", label) + SHOW_PARSE_END + } + + CHECK_LABEL(label); + + struct cbl_proc_t *procedure = find_procedure(label); + gg_append_statement(procedure->top.label); + paragraph_label(procedure); + current_function->current_paragraph = procedure; + + TRACE1 + { + TRACE1_HEADER + TRACE1_LABEL("\"", label, "\"") + TRACE1_END + } + } + +void +parser_exit_section(void) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("\"", current_function->current_section->label->name, "\"") + TRACE1_END + } + gg_append_statement(current_function->current_section->exit.go_to); + } + +void +parser_exit_paragraph(void) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("\"", current_function->current_paragraph->label->name, "\"") + TRACE1_END + } + gg_append_statement(current_function->current_paragraph->exit.go_to); + } + +void +parser_exit_perform(struct cbl_perform_tgt_t *tgt, bool cycle) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + if(cycle) + { + gg_append_statement(tgt->addresses.testA.go_to); + } + else + { + gg_append_statement(tgt->addresses.exit.go_to); + } + } + +void +parser_alter( cbl_perform_tgt_t *tgt ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + cbl_label_t *altered = tgt->from(); + cbl_label_t *proceed_to = tgt->to(); + + struct cbl_proc_t *altered_proc = find_procedure(altered); + struct cbl_proc_t *proceed_to_proc = find_procedure(proceed_to); + + gg_assign( altered_proc->alter_location, + proceed_to_proc->top.addr); + } + +void +parser_goto( cbl_refer_t value_ref, size_t narg, cbl_label_t * const labels[] ) + { + // This is part of the Terrible Trio of parser_perform, parser_goto and + // parser_enter_[procedure]. parser_goto has an easier time of it than + // the other two, because it just has to jump from here to the entry point + // of the paragraph [or section] + Analyze(); + + SHOW_PARSE + { + SHOW_PARSE_HEADER + for(size_t i=0; iname); + } + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + for(size_t i=0; iname); + TRACE1_TEXT(" "); + } + TRACE1_END + } + + gcc_assert(narg >= 1); + + // This is a computed GOTO. It might have only one element, which is + // an ordinary GOTO without a DEPENDING ON clause. We create that table + // anyway, because in the case of an ALTER statement, we will be replacing + // that sole element with the PROCEED TO element. + + // We need to create a static array of pointers to locations: + static int comp_gotos = 1; + char ach[32]; + sprintf(ach, "_comp_goto_%d", comp_gotos++); + tree array_of_pointers_type = build_array_type_nelts(VOID_P, narg); + tree array_of_pointers = gg_define_variable(array_of_pointers_type, ach, vs_static); + + // We have the array. Now we need to build the constructor for it + tree constr = make_node(CONSTRUCTOR); + TREE_TYPE(constr) = array_of_pointers_type; + TREE_STATIC(constr) = 1; + TREE_CONSTANT(constr) = 1; + + for(size_t i=0; itop.addr ); + } + DECL_INITIAL(array_of_pointers) = constr; + + // We need to pick up the value argument as an INT: + tree value = gg_define_int(); + + if( value_ref.field ) + { + get_binary_value( value, + NULL, + value_ref.field, + refer_offset_source(value_ref)); + // Convert it from one-based to zero-based: + gg_decrement(value); + // Check to see if the value is in the range 0...narg-1: + IF( value, ge_op, integer_zero_node) + { + IF( value, lt_op, build_int_cst_type(INT, narg) ) + { + // It is in the valid range, so we can do the goto: + Analyzer.ExitMessage(); + gg_goto(gg_array_value(array_of_pointers, value)); + } + ELSE + { + // Otherwise, just fall through + } + ENDIF + } + ELSE + ENDIF + } + else + { + // This is a simple GOTO. Because it is a simple GO TO, there is the + // possibility that this paragraph was the target of an ALTER statement. + IF( current_function->current_paragraph->alter_location, ne_op, null_pointer_node ) + { + // Somebody did an ALTER statement before we got here + gg_assign(current_function->void_star_temp, current_function->current_paragraph->alter_location); + } + ELSE + { + // This paragraph wasn't the target of an ALTER: + gg_assign(current_function->void_star_temp, gg_array_value(array_of_pointers, 0)); + } + ENDIF + Analyzer.ExitMessage(); + gg_goto(current_function->void_star_temp); + } + return; + } + +void +parser_perform(cbl_label_t *label, bool suppress_nexting) + { + label->used = yylineno; + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_LABEL(" ", label) + char ach[32]; + sprintf(ach, " label is at %p", label); + SHOW_PARSE_TEXT(ach) + sprintf(ach, " label->proc is %p", label->structs.proc); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_LABEL("", label, "") + TRACE1_END + } + + CHECK_LABEL(label); + + struct cbl_proc_t *procedure = find_procedure(label); + + // We need to create the unnamed return address that we + // will instantiate right after the goto: + tree return_address_decl = build_decl( UNKNOWN_LOCATION, + LABEL_DECL, + NULL_TREE, + void_type_node); + DECL_CONTEXT(return_address_decl) = current_function->function_decl; + TREE_USED(return_address_decl) = 1; + + tree return_label_expr = build1(LABEL_EXPR, + void_type_node, + return_address_decl); + tree return_addr = gg_get_address_of(return_address_decl); + +// cbl_parser_mod *parser_mod = new cbl_parser_mod; + + // Put the return address onto the pseudo-return stack + pseudo_return_push(procedure, return_addr); + + // Create the code that will launch the paragraph + // The following comment is, believe it or not, necessary. The insertion + // includes a line number insertion that's needed because when the goto/label + // pairs were created, the locations of the goto instruction and the label + // were not known. + + char *para_name = nullptr; + char *sect_name = nullptr; + const char *program_name = current_function->our_unmangled_name; + size_t deconflictor = symbol_label_id(label); + + char ach[256]; + if( label->type == LblParagraph ) + { + cbl_label_t *section_label = cbl_label_of(symbol_at(label->parent)); + para_name = label->name; + sect_name = section_label->name; + sprintf(ach, + "# PERFORM %s of %s of %s (%ld)", + para_name, + sect_name, + program_name, + deconflictor); + + gg_insert_into_assembler(ach); + } + else + { + sect_name = label->name; + sprintf(ach, + "# PERFORM %s of %s (%ld)", + sect_name, + program_name, + deconflictor); + gg_insert_into_assembler(ach); + } + + if( !suppress_nexting ) + { + sprintf(ach, + "_proccall.%ld.%d:", + symbol_label_id(label), + call_counter++); + gg_insert_into_assembler( ach ); + } + + // We do the indirect jump in order to prevent the compiler from complaining + // in the case where we are performing a USE GLOBAL DECLARATIVE. Without the + // indirection, the compiler isn't able to handle the case where we are + // jumping to a location in our parent program-id; it can't find a matching + // local symbol, and crashes. + gg_goto(procedure->top.addr); + + // And create the return address label: + gg_append_statement(return_label_expr); + TRACE1 + { + TRACE1_HEADER + TRACE1_LABEL("back_from_performing ", label, "") + TRACE1_END + } + } + +void +parser_perform_times( cbl_label_t *proc_1, cbl_refer_t count ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_LABEL(" ", proc_1) + SHOW_PARSE_REF(" ", count) + SHOW_PARSE_TEXT(" TIMES") + char ach[32]; + sprintf(ach, " proc_1 is at %p", proc_1); + SHOW_PARSE_TEXT(ach) + sprintf(ach, " proc_1->proc is %p", proc_1->structs.proc); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + char ach[256]; + size_t our_pseudo_label = pseudo_label++; + sprintf(ach, + "_proccallb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + + tree counter = gg_define_variable(LONG); + + // Get the count: + get_binary_value( counter, + NULL, + count.field, + refer_offset_source(count)); + + // Make sure the initial count is valid: + WHILE( counter, gt_op, gg_cast(LONG, integer_zero_node) ) + { + static const bool suppress_nexting = true; + parser_perform(proc_1, suppress_nexting); + gg_decrement(counter); + } + WEND + + sprintf(ach, + "_procretb.%ld:", + our_pseudo_label); + gg_insert_into_assembler(ach); + } + +static void +internal_perform_through( cbl_label_t *proc_1, + cbl_label_t *proc_2, + bool suppress_nexting ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_LABEL(" ", proc_1); + char ach[32]; + sprintf(ach, " proc_1 is at %p", proc_1); + SHOW_PARSE_TEXT(ach) + sprintf(ach, " proc_1->proc is %p", proc_1->structs.proc); + SHOW_PARSE_TEXT(ach) + if( proc_2 ) + { + SHOW_PARSE_INDENT + SHOW_PARSE_LABEL("", proc_2); + sprintf(ach, " proc_2 is at %p", proc_2); + SHOW_PARSE_TEXT(ach) + sprintf(ach, " proc_2->proc is %p", proc_2->structs.proc); + SHOW_PARSE_TEXT(ach) + } + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + CHECK_LABEL(proc_1); + + if(!proc_2) + { + parser_perform(proc_1, suppress_nexting); + return; + } + + CHECK_LABEL(proc_2); + + struct cbl_proc_t *proc1 = find_procedure(proc_1); + struct cbl_proc_t *proc2 = find_procedure(proc_2); + + // We need to create the unnamed return address that we + // will instantiate right after the goto: + tree return_address_decl = build_decl( UNKNOWN_LOCATION, + LABEL_DECL, + NULL_TREE, + void_type_node); + DECL_CONTEXT(return_address_decl) = current_function->function_decl; + TREE_USED(return_address_decl) = 1; + + tree return_label_expr = build1(LABEL_EXPR, + void_type_node, + return_address_decl); + tree return_addr = gg_get_address_of(return_address_decl); + + //cbl_parser_mod *parser_mod_proc1 = new cbl_parser_mod; + //cbl_parser_mod *parser_mod_proc2 = new cbl_parser_mod; + + // Put the return address of the second procedure onto the stack: + pseudo_return_push(proc2, return_addr); + + // Create the code that will launch the first procedure + gg_insert_into_assembler("# PERFORM %s THROUGH %s", + proc_1->name, proc_2->name); + + if( !suppress_nexting ) + { + char ach[256]; + sprintf(ach, + "_proccall.%ld.%d:", + symbol_label_id(proc_2), + call_counter++); + gg_insert_into_assembler(ach); + } + + gg_append_statement(proc1->top.go_to); + + // And create the return address label: + gg_append_statement(return_label_expr); + } + +static void +internal_perform_through_times( cbl_label_t *proc_1, + cbl_label_t *proc_2, + cbl_refer_t &count) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_LABEL(" ", proc_1); + char ach[32]; + sprintf(ach, " proc_1 is at %p", proc_1); + SHOW_PARSE_TEXT(ach) + sprintf(ach, " proc_1->proc is %p", proc_1->structs.proc); + SHOW_PARSE_TEXT(ach) + if( proc_2 ) + { + SHOW_PARSE_INDENT + SHOW_PARSE_LABEL("", proc_2); + sprintf(ach, " proc_2 is at %p", proc_2); + SHOW_PARSE_TEXT(ach) + sprintf(ach, " proc_2->proc is %p", proc_2->structs.proc); + SHOW_PARSE_TEXT(ach) + } + SHOW_PARSE_REF(" ", count); + SHOW_PARSE_TEXT(" TIMES"); + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + size_t our_pseudo_label = pseudo_label++; + + char ach[256]; + sprintf(ach, + "_proccallb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + + tree counter = gg_define_variable(LONG); + get_binary_value( counter, + NULL, + count.field, + refer_offset_source(count)); + WHILE( counter, gt_op, gg_cast(LONG, integer_zero_node) ) + { + internal_perform_through(proc_1, proc_2, true); // true means suppress_nexting + gg_decrement(counter); + } + WEND + + sprintf(ach, + "_procretb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + } + +void +register_main_switch(const char *main_string) + { + char *mstr = xstrdup(main_string); + char *p = strchr(mstr, ':'); + if( p ) + { + *p = '\0'; + main_string = p+1; + main_strings[mstr] = main_string; + } + else + { + main_strings[mstr] = ""; + } + free(mstr); + } + +static int file_level = 0; + +void +parser_first_statement( int lineno ) + { + // In the event that this routine is the one that main() calls to get the + // execution ball rolling, we want the GDB "start" function to be able + // to set a temporary breakpoint at this location. We get that rolling + // here. + + char ach[256]; + + SHOW_PARSE + { + SHOW_PARSE_HEADER + sprintf(ach, " lineno is %d, suppression is %d", lineno, suppress_cobol_entry_point); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + + if( strcmp(current_function->our_name, ach_cobol_entry_point) == 0 + && !suppress_cobol_entry_point ) + { + sprintf(ach, + "%s:%d", + current_filename.back().c_str(), + lineno); + *ach_cobol_entry_point = '\0'; + create_cblc_string_variable("_cobol_entry_point", ach); + + SHOW_PARSE + { + SHOW_PARSE_INDENT + char ach2[512]; + sprintf(ach2, "setting _cobol_entry_point to \"%s\"", ach); + SHOW_PARSE_TEXT(ach2) + SHOW_PARSE_END + } + } + + if( !suppress_cobol_entry_point ) + { + char achentry[128]; + sprintf(ach, + "%s:%d", + current_filename.back().c_str(), + lineno); + + sprintf(achentry, "_prog_entry_point_%s", current_function->our_name); + create_cblc_string_variable(achentry, ach); + } + } + +#define linemap_add(...) + +void +parser_enter_file(const char *filename) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + char ach[32]; + sprintf(ach, " entering level:%d %s", file_level+1, filename); + SHOW_PARSE_TEXT(ach); + SHOW_PARSE_END + } + + current_filename.push_back(filename); + + std::unordered_map::const_iterator it + = main_strings.find(filename); + + if( it != main_strings.end() ) + { + // There was a -main switch for this file. + this_module_has_main = true; + next_program_is_main = true; + + const char *pname = it->second.c_str(); + if( pname && strlen(pname) ) + { + main_entry_point = xstrdup(pname); + } + } + + // Let the linemap routine know we are working on a new file: + linemap_add(line_table, LC_ENTER, 0, filename, 1); + + if( file_level == 0 ) + { + // Build a translation_unit_decl: + gg_build_translation_unit(filename); + create_our_type_nodes(); + } + + file_level += 1; + + if( file_level == 1 ) + { + // This table is used for "creating" the file-static named variables used in + // the GENERIC we generate. + + // Establish our variable declarations for global variables in libgcobol: + +#define SET_VAR_DECL(A, B, C) \ + A = gg_declare_variable(B, C, NULL_TREE, vs_external_reference) + + SET_VAR_DECL(var_decl_exception_code , INT , "__gg__exception_code"); + SET_VAR_DECL(var_decl_exception_handled , INT , "__gg__exception_handled"); + SET_VAR_DECL(var_decl_exception_file_number , INT , "__gg__exception_file_number"); + SET_VAR_DECL(var_decl_exception_file_status , INT , "__gg__exception_file_status"); + SET_VAR_DECL(var_decl_exception_file_name , CHAR_P , "__gg__exception_file_name"); + SET_VAR_DECL(var_decl_exception_statement , CHAR_P , "__gg__exception_statement"); + SET_VAR_DECL(var_decl_exception_source_file , CHAR_P , "__gg__exception_source_file"); + SET_VAR_DECL(var_decl_exception_line_number , INT , "__gg__exception_line_number"); + SET_VAR_DECL(var_decl_exception_program_id , CHAR_P , "__gg__exception_program_id"); + SET_VAR_DECL(var_decl_exception_section , CHAR_P , "__gg__exception_section"); + SET_VAR_DECL(var_decl_exception_paragraph , CHAR_P , "__gg__exception_paragraph"); + + SET_VAR_DECL(var_decl_default_compute_error , INT , "__gg__default_compute_error"); + SET_VAR_DECL(var_decl_rdigits , INT , "__gg__rdigits"); + SET_VAR_DECL(var_decl_odo_violation , INT , "__gg__odo_violation"); + SET_VAR_DECL(var_decl_unique_prog_id , SIZE_T , "__gg__unique_prog_id"); + + SET_VAR_DECL(var_decl_entry_location , VOID_P , "__gg__entry_pointer"); + SET_VAR_DECL(var_decl_exit_address , VOID_P , "__gg__exit_address"); + + SET_VAR_DECL(var_decl_call_parameter_signature , CHAR_P , "__gg__call_parameter_signature"); + SET_VAR_DECL(var_decl_call_parameter_count , INT , "__gg__call_parameter_count"); + SET_VAR_DECL(var_decl_call_parameter_lengths , build_array_type(SIZE_T, NULL), + "__gg__call_parameter_lengths"); + SET_VAR_DECL(var_decl_return_code , SHORT , "__gg__data_return_code"); + + SET_VAR_DECL(var_decl_arithmetic_rounds_size , SIZE_T , "__gg__arithmetic_rounds_size"); + SET_VAR_DECL(var_decl_arithmetic_rounds , INT_P , "__gg__arithmetic_rounds"); + SET_VAR_DECL(var_decl_fourplet_flags_size , SIZE_T , "__gg__fourplet_flags_size"); + SET_VAR_DECL(var_decl_fourplet_flags , INT_P , "__gg__fourplet_flags"); + + SET_VAR_DECL(var_decl_treeplet_1f , cblc_field_pp_type_node , "__gg__treeplet_1f" ); + SET_VAR_DECL(var_decl_treeplet_1o , SIZE_T_P , "__gg__treeplet_1o" ); + SET_VAR_DECL(var_decl_treeplet_1s , SIZE_T_P , "__gg__treeplet_1s" ); + SET_VAR_DECL(var_decl_treeplet_2f , cblc_field_pp_type_node , "__gg__treeplet_2f" ); + SET_VAR_DECL(var_decl_treeplet_2o , SIZE_T_P , "__gg__treeplet_2o" ); + SET_VAR_DECL(var_decl_treeplet_2s , SIZE_T_P , "__gg__treeplet_2s" ); + SET_VAR_DECL(var_decl_treeplet_3f , cblc_field_pp_type_node , "__gg__treeplet_3f" ); + SET_VAR_DECL(var_decl_treeplet_3o , SIZE_T_P , "__gg__treeplet_3o" ); + SET_VAR_DECL(var_decl_treeplet_3s , SIZE_T_P , "__gg__treeplet_3s" ); + SET_VAR_DECL(var_decl_treeplet_4f , cblc_field_pp_type_node , "__gg__treeplet_4f" ); + SET_VAR_DECL(var_decl_treeplet_4o , SIZE_T_P , "__gg__treeplet_4o" ); + SET_VAR_DECL(var_decl_treeplet_4s , SIZE_T_P , "__gg__treeplet_4s" ); + SET_VAR_DECL(var_decl_nop , INT , "__gg__nop" ); + SET_VAR_DECL(var_decl_main_called , INT , "__gg__main_called" ); + } + } + +void +parser_leave_file() + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + char ach[256]; + sprintf(ach, "leaving level:%d %s", file_level, current_filename.back().c_str()); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + if( file_level > 0) + { + linemap_add(line_table, LC_LEAVE, false, NULL, 0); + } + file_level -= 1; + current_filename.pop_back(); + } + +void +enter_program_common(const char *funcname, const char *funcname_) + { + // We arrive here when processing a PROGRAM-ID. + + // At this point, we don't know how many formal parameters there are going + // to be. + + // We are going to create a function returning a 64-bit value, but it'll + // have no parameters. We'll chain the parameters on in parser_division(), + // when we process PROCEDURE DIVISION USING... + + gg_define_function_with_no_parameters( COBOL_FUNCTION_RETURN_TYPE, + funcname, + funcname_); + + current_function->first_time_through = + gg_define_variable(INT, + "_first_time_through", + vs_static, + integer_one_node); + + gg_create_goto_pair(¤t_function->skip_init_goto, + ¤t_function->skip_init_label); + + IF( current_function->first_time_through, eq_op, integer_zero_node ) + gg_append_statement(current_function->skip_init_goto); + ELSE + ENDIF + + gg_assign(current_function->first_time_through, integer_zero_node); + + // Establish variables that are function-wide in scope: + current_function->void_star_temp = gg_define_void_star("_void_star_temp"); + + current_function->perform_exit_address + = gg_define_void_star("_perform_exit_address"); + + // Make sure the following are null, because when we create the unnamed + // default section, parser_enter_section will attempt to close them out. And + // it's possible on the first go-through that they have garbage values. + + current_function->current_section = NULL; + current_function->current_paragraph = NULL; + + current_function->is_truly_nested = false; + + // Text conversion must be initialized before the code generated by + // parser_symbol_add runs. + + // The text_conversion_override exists both in the library and in the compiler + + __gg__set_internal_codeset(internal_codeset_is_ebcdic()); + gg_call(VOID, + "__gg__set_internal_codeset", + internal_codeset_is_ebcdic() + ? integer_one_node : integer_zero_node, + NULL_TREE); + + __gg__text_conversion_override(td_default_e, cs_default_e); + gg_call(VOID, + "__gg__text_conversion_override", + build_int_cst_type(INT, td_default_e), + build_int_cst_type(INT, cs_default_e), + NULL_TREE); + + gg_call(VOID, + "__gg__codeset_figurative_constants", + NULL_TREE); + + static int counter=1; + char ach[32]; + + sprintf(ach, "_cf_fds_%d", counter); + current_function->first_declarative_section + = gg_define_variable(CHAR_P, + ach, + vs_static, + null_pointer_node); + sprintf(ach, "_cf_cbmc_%d", counter); + current_function->called_by_main_counter = gg_define_variable(INT, + ach, + vs_static, + integer_zero_node); + counter += 1; + + // Initialize the TRACE logic, which has to be done before the first TRACE1 + // invocation, but after there is a function to lay down GIMPLE code in. + + // That is to say: Here. Multiple invocations of trace1_init are harmless. + trace1_init(); + } + +/* Creates a function for program-id 'funcname_'. Returns 1 when funcname_ + is "main" and the -main compiler switch is active for this moudle */ + +void +parser_enter_program( const char *funcname_, + bool is_function, // True for user-defined-function + int *pretval) + { + *pretval = 0; + + // The first thing we have to do is mangle this name. This is safe even + // though the end result will be mangled again, because the mangler doesn't + // change a mangled name. + char *mangled_name = cobol_name_mangler(funcname_); + + size_t parent_index = current_program_index(); + char funcname[128]; + if( parent_index ) + { + // This is a nested function. Tack on the parent_index to the end of it. + sprintf(funcname, "%s.%ld", mangled_name, parent_index); + } + else + { + // This is a top-level function; just use the straight mangled name + strcpy(funcname, mangled_name); + } + free(mangled_name); + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(funcname) + SHOW_PARSE_END + } + + if( !is_function && !parent_index ) + { + // This is a top_level program, and not a function + if( next_program_is_main ) + { + next_program_is_main = false; + if(main_entry_point) + { + build_main_that_calls_something(main_entry_point); + free(main_entry_point); + main_entry_point = NULL; + } + else + { + build_main_that_calls_something(funcname); + } + } + } + + // Call this after build_main_that_calls_something, because it manipulates + // the current line number to DEFAULT_LINE_NUMBER. We have to manipulate it + // back afterward. + gg_set_current_line_number(CURRENT_LINE_NUMBER); + + if( strcmp(funcname_, "main") == 0 && this_module_has_main ) + { + // setting 'retval' to 1 let's the caller know that we are being told + // both to synthesize a main() entry point to duplicate GCC's default + // behavior, and to create an explicit entry point named "main". This will + // eventually result in a link error (because of the duplicated entry + // points. The return value serves as an alert; it's up to the caller to + // decide what to do. + *pretval = 1; + } + + if( strcmp(funcname, "dubner") == 0) + { + // This should be enabled by an environment variable. + // But for now I am being cutesy + hijack_for_development(funcname); + return; + } + + enter_program_common(funcname, funcname_); + current_function->is_function = is_function; + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("entered program \"") + TRACE1_TEXT(funcname) + TRACE1_TEXT("\"") + TRACE1_END + } + } + +void +parser_end_program(const char *prog_name ) + { + if( gg_trans_unit.function_stack.size() ) + { + // The body has been created by various parser calls. It's time + // to wrap this sucker up! + + // Ending the program ends the current paragraph and section: + leave_paragraph_internal(); + leave_section_internal(); + } + + SHOW_PARSE + { + SHOW_PARSE_HEADER + TRACE1_TEXT_ABC("\"", prog_name, "\"") + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("\"", prog_name, "\"") + TRACE1_END + } + + if( gg_trans_unit.function_stack.size() ) + { + // The body has been created by various parser calls. It's time + // to wrap this sucker up! + + // Put in a harmless return in case there was no EXIT PROGRAM statement. + // It's harmless because if it isn't needed, a return was already + // executed, and this generated code will never be executed + parser_exit( cbl_refer_t() ); + + // Tell the GCC compiler to do the GIMPLIFY thing. + gg_finalize_function(); + } + } + +static void +remove_p_from_picture(char *picture) + { + // At this point, attr has the scaled_e flag, and rdigits tells us + // which way to scale. So, the P characters in picture are now + // a liability. + + char *rabbit = picture; + char *fox = picture; + + for(;;) + { + char ch = *rabbit++; + if( ch == '\0' ) + { + break; + } + if( ch == 'P' || ch == 'p' ) + { + if( *rabbit == '(' ) + { + while( *rabbit != ')' ) + { + rabbit += 1; + } + rabbit += 1; + // rabbit now points to one past the closing parenthesis + } + size_t to_move = strlen(rabbit); + memmove(fox, rabbit, to_move+1); // +1 snags the '\0' + rabbit = fox; + } + else + { + fox += 1; + } + } + } + +static tree vti_array; +static tree vti_constructor; +static int vti_list_size; +static int vti_next_variable; + +void +parser_init_list_size(int count_of_variables) + { + if( mode_syntax_only() ) return; + + vti_list_size = count_of_variables; + char ach[48]; + sprintf(ach, + "..variables_to_init_%ld", + current_function->our_symbol_table_index); + tree array_of_variables_type = build_array_type_nelts(VOID_P, + count_of_variables+1); + vti_array = gg_define_variable( array_of_variables_type, + ach, + vs_file_static); + vti_constructor = make_node(CONSTRUCTOR); + TREE_TYPE(vti_constructor) = array_of_variables_type; + TREE_STATIC(vti_constructor) = 1; + TREE_CONSTANT(vti_constructor) = 1; + vti_next_variable = 0; + } + +void +parser_init_list_element(cbl_field_t *field) + { + if( mode_syntax_only() ) return; + + gcc_assert(vti_next_variable < vti_list_size); + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(vti_constructor), + build_int_cst_type(SIZE_T, vti_next_variable++), + gg_get_address_of(field->var_decl_node) ); + if( vti_next_variable == vti_list_size) + { + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(vti_constructor), + build_int_cst_type(SIZE_T, vti_next_variable++), + null_pointer_node ); + DECL_INITIAL(vti_array) = vti_constructor; + } + } + +void +parser_init_list() + { + if( mode_syntax_only() ) return; + + char ach[48]; + sprintf(ach, + "..variables_to_init_%ld", + current_function->our_symbol_table_index); + tree array = gg_trans_unit_var_decl(ach); + gg_call(VOID, + "__gg__variables_to_init", + gg_get_address_of(array), + wsclear() ? gg_string_literal(wsclear()) : null_pointer_node, + NULL_TREE); + } + +static void +psa_FldLiteralN(struct cbl_field_t *field ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", field) + SHOW_PARSE_END + } + // We are constructing a completely static constant structure, based on the + // text string in .initial + + __int128 value = 0; + + do + { + // This is a false do{}while, to isolate the variables: + + // We need to convert data.initial to an __int128 value + char *p = const_cast(field->data.initial); + int sign = 1; + if( *p == '-' ) + { + field->attr |= signable_e; + sign = -1; + p += 1; + } + else if( *p == '+' ) + { + // We set it signable so that the instruction DISPLAY +1 + // actually outputs "+1" + field->attr |= signable_e; + p += 1; + } + + // We need to be able to handle + // 123 + // 123.456 + // 123E + // 123.456E + // where can be N, +N and -N + // + // Oh, yeah, and we're talking handling up to 32 digits, or more, so using + // library routines is off the table. + + int digits = 0; + int rdigits = 0; + int rdigit_delta = 0; + int exponent = 0; + + char *exp = strchr(p, 'E'); + if( !exp ) + { + exp = strchr(p, 'e'); + } + if(exp) + { + exponent = atoi(exp+1); + } + + // We can now calculate the value, and the number of digits and rdigits. + + // We count up leading zeroes as part of the attr->digits calculation. + // It turns out that certain comparisons need to know the number of digits, + // because "IF "2" EQUAL 002" is false, while "IF "2" EQUAL 2" is true. So, + // we need to count up leading zeroes. + + for(;;) + { + char ch = *p++; + if( ch == symbol_decimal_point() ) + { + rdigit_delta = 1; + continue; + } + if( ch < '0' || ch > '9' ) + { + break; + } + digits += 1; + rdigits += rdigit_delta; + value *= 10; + value += ch - '0'; + } + + if( exponent < 0 ) + { + rdigits += -exponent; + } + else + { + while(exponent--) + { + if(rdigits) + { + rdigits -= 1; + } + else + { + digits += 1; + value *= 10; + } + } + } + + if(digits < rdigits) + { + digits = rdigits; + } + field->data.digits = digits; + field->data.rdigits = rdigits; + + // We now need to calculate the capacity. + + unsigned char *pvalue = (unsigned char *)&value; + int capacity; + if( *(uint64_t*)(pvalue + 8) ) + { + // Bytes 15 through 8 are non-zero + capacity = 16; + } + else if( *(uint32_t*)(pvalue + 4) ) + { + // Bytes 7 through 4 are non-zero + capacity = 8; + } + else if( *(uint16_t*)(pvalue + 2) ) + { + // Bytes 3 and 2 + capacity = 4; + } + else if( pvalue[1] ) + { + // Byte 1 is non-zero + capacity = 2; + } + else + { + // The value is zero through 0xFF + capacity = 1; + } + + value *= sign; + + // One last adjustment. The number is signable, so the binary value + // is going to be treated as twos complement. That means that the highest + // bit has to be 1 for negative signable numbers, and 0 for positive. If + // necessary, adjust capacity up by one byte so that the variable fits: + + if( capacity < 16 && (field->attr & signable_e) ) + { + if( value < 0 && (((pvalue[capacity-1] & 0x80) == 0 ))) + { + capacity *= 2; + } + else if( value >= 0 && (((pvalue[capacity-1] & 0x80) == 0x80 ))) + { + capacity *= 2; + } + } + field->data.capacity = capacity; + + }while(0); + + char base_name[257]; + char id_string[32] = ""; + + static size_t our_index = 0; + + sprintf(id_string, ".%ld", ++our_index); + strcpy(base_name, field->name); + strcat(base_name, id_string); + + tree var_type; + + if( field->data.capacity == 16 ) + { + /* GCC-13 has no provision for an int128 constructor. So, we use a + union for our necessary __int128. + + typedef union cblc_int128_t + { + unsigned char array16[16]; + __uint128 uval128; + __int128 sval128; + } cblc_int128_t; + + We build a constructor for the array16[], and then we use that + constructor in the constructor for the union. + */ + + // Build the constructor for array16 + tree array16_type = build_array_type_nelts(UCHAR, 16); + tree array_16_constructor = make_node(CONSTRUCTOR); + TREE_TYPE(array_16_constructor) = array16_type; + TREE_STATIC(array_16_constructor) = 1; + TREE_CONSTANT(array_16_constructor) = 1; + + for(int i=0; i<16; i++) + { + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(array_16_constructor), + build_int_cst_type(INT, i), + build_int_cst_type(UCHAR, + ((unsigned char *)&value)[i])); + } + + // The array16 constructor is ready to be used + + // So, we need a constructor for the union: + // Now we create the union: + var_type = cblc_int128_type_node; + + tree union_constructor = make_node(CONSTRUCTOR); + TREE_TYPE(union_constructor) = var_type; + TREE_STATIC(union_constructor) = 1; + TREE_CONSTANT(union_constructor) = 1; + + // point next_field to the first field of the union, and + // set the value to be the table constructor + tree next_field = TYPE_FIELDS(var_type); + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(union_constructor), + next_field, + array_16_constructor ); + + tree new_var_decl = gg_define_variable( var_type, + base_name, + vs_static); + DECL_INITIAL(new_var_decl) = union_constructor; + + field->data_decl_node = member(new_var_decl, "sval128"); + TREE_READONLY(field->data_decl_node) = 1; + TREE_CONSTANT(field->data_decl_node) = 1; + + // Convert the compile-time data.value to a run-time variable decl node: + sprintf(id_string, ".%ld", ++our_index); + strcpy(base_name, field->name); + strcat(base_name, id_string); + field->literal_decl_node = gg_define_variable(DOUBLE, id_string, vs_static); + TREE_READONLY(field->literal_decl_node) = 1; + TREE_CONSTANT(field->literal_decl_node) = 1; + char ach[128]; + strfromf128(ach, sizeof(ach), "%.36E", field->data.value); + REAL_VALUE_TYPE real; + real_from_string(&real, ach); + tree initer = build_real (DOUBLE, real); + DECL_INITIAL(field->literal_decl_node) = initer; + + } + else + { + // The value is 1, 2, 4, or 8 bytes, so an ordinary constructor can be used. + var_type = tree_type_from_size( field->data.capacity, + field->attr & signable_e); + tree new_var_decl = gg_define_variable( var_type, + base_name, + vs_static); + DECL_INITIAL(new_var_decl) = build_int_cst_type(var_type, value); + field->data_decl_node = new_var_decl; + } + } + +static void +psa_FldBlob(struct cbl_field_t *var ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", var) + SHOW_PARSE_END + } + + // We are constructing a completely static constant structure. We know the + // capacity. We'll create it from the data.initial. The var_decl_node will + // be a pointer to the data + + char base_name[257]; + char id_string[32] = ""; + + static size_t our_index = 0; + + sprintf(id_string, ".%ld", ++our_index); + strcpy(base_name, var->name); + strcat(base_name, id_string); + + // Build the constructor for the array of bytes + + tree array_type = build_array_type_nelts(UCHAR, var->data.capacity); + tree array_constructor = make_node(CONSTRUCTOR); + TREE_TYPE(array_constructor) = array_type; + TREE_STATIC(array_constructor) = 1; + TREE_CONSTANT(array_constructor) = 1; + + for(size_t i=0; idata.capacity; i++) + { + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(array_constructor), + build_int_cst_type(INT, i), + build_int_cst_type(UCHAR, var->data.initial[i])); + } + + // The array constructor is ready to be used + tree var_decl_node = gg_define_variable( array_type, + base_name, + vs_static); + DECL_INITIAL(var_decl_node) = array_constructor; + var->var_decl_node = gg_get_address_of(var_decl_node); + } + +void +parser_accept( struct cbl_refer_t refer, + enum special_name_t special_e ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_REF(" ", refer); + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + /* + enum special_name_t + { + SYSIN_e, + SYSIPT_e, + SYSOUT_e, + SYSLIST_e, + SYSLST_e, + SYSPUNCH_e, + SYSPCH_e, + CONSOLE_e, + C01_e, C02_e, C03_e, C04_e, C05_e, C06_e, + C07_e, C08_e, C09_e, C10_e, C11_e, C12_e, + CSP_e, + S01_e, S02_e, S03_e, S04_e, S05_e, + AFP_5A_e, + }; + */ + + // The ISO spec describes the valid special names for ACCEPT as implementation + // dependent. We are following IBM's lead. + + tree environment = build_int_cst_type(INT, special_e); + + switch( special_e ) + { + case CONSOLE_e: + case SYSIPT_e: + case SYSIN_e: + break; + default: + dbgmsg("%s(): We don't know what to do with special_name_t %d,", __func__, special_e); + dbgmsg("%s(): so we are ignoring it.", __func__); + yywarn("unrecognized SPECIAL NAME ignored"); + return; + break; + } + + gg_call(VOID, + "__gg__accept", + environment, + gg_get_address_of(refer.field->var_decl_node), + refer_offset_dest(refer), + refer_size_dest(refer), + NULL_TREE); + } + +// TODO: update documentation. +void +parser_accept_exception( cbl_label_t *accept_label ) + { + // We can't use Analyze() on this one, because the exit ends up being laid + // down before the enter when the goto logic gets untangled by the compiler. + + // We are entering either SIZE ERROR or NOT SIZE ERROR code + RETURN_IF_PARSE_ONLY; + set_up_on_exception_label(accept_label); + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" Laying down GOTO OVER") + SHOW_PARSE_LABEL(" ", accept_label) + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down LABEL INTO:") + SHOW_PARSE_LABEL(" ", accept_label) + SHOW_PARSE_END + } + + // Jump over the [NOT] ON EXCEPTION code that is about to be laid down + gg_append_statement( accept_label->structs.arith_error->over.go_to ); + // Create the label that allows the following code to be executed at + // when an ERROR, or NOT ERROR, has been determined to have taken place: + gg_append_statement( accept_label->structs.arith_error->into.label ); + } + +void +parser_accept_exception_end( cbl_label_t *accept_label ) + { + // We can't use Analyze() on this one, because the exit ends up being laid + // down before the enter when the goto logic gets untangled by the compiler. + + // We have reached the end of the ERROR, or NOT ERROR, code. + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" Laying down GOTO BOTTOM") + SHOW_PARSE_LABEL(" ", accept_label) + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down LABEL OVER:") + SHOW_PARSE_LABEL(" ", accept_label) + SHOW_PARSE_END + } + + // Jump to the end of the arithmetic code: + gg_append_statement( accept_label->structs.arith_error->bottom.go_to ); + // Lay down the label that allows the ERROR/NOT ERROR instructions + // to exist in a lacuna that doesn't get executed unless somebody jumps + // to it: + gg_append_statement( accept_label->structs.arith_error->over.label ); + } + +void +parser_accept_command_line( cbl_refer_t tgt, + cbl_refer_t source, + cbl_label_t *error, + cbl_label_t *not_error ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( error ) + { + SHOW_PARSE_LABEL(" error ", error) + } + if( not_error ) + { + SHOW_PARSE_LABEL(" not_error ", not_error) + } + SHOW_PARSE_END + } + + static tree erf = gg_define_variable(INT, "..pac_erf", vs_file_static); + + if( !source.field ) + { + // The whole command-line is wanted + gg_assign(erf, + gg_call_expr( INT, + "__gg__get_command_line", + gg_get_address_of(tgt.field->var_decl_node), + refer_offset_dest(tgt), + refer_size_dest(tgt), + NULL_TREE)); + if( error ) + { + // There is an ON EXCEPTION phrase: + IF( erf, ne_op, integer_zero_node ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down GOTO error->INTO for_command_line") + SHOW_PARSE_LABEL(" ", error) + } + gg_append_statement( error->structs.arith_error->into.go_to ); + } + ELSE + { + } + ENDIF + } + if( not_error ) + { + // There is an NOT ON EXCEPTION phrase: + IF( erf, eq_op, integer_zero_node ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down GOTO not_error->INTO for command_line") + SHOW_PARSE_LABEL(" ", not_error) + } + gg_append_statement( not_error->structs.arith_error->into.go_to ); + } + ELSE + { + } + ENDIF + } + } + else + { + // A particular parameter has been requested: + gg_assign(erf, + gg_call_expr( INT, + "__gg__get_argv", + gg_get_address_of(tgt.field->var_decl_node), + refer_offset_dest(tgt), + refer_size_dest(tgt), + gg_get_address_of(source.field->var_decl_node), + refer_offset_dest(source), + refer_size_dest(source), + NULL_TREE)); + if( error ) + { + // There is an ON EXCEPTION phrase: + IF( erf, ne_op, integer_zero_node ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down GOTO error->INTO for_argv") + SHOW_PARSE_LABEL(" ", error) + } + gg_append_statement( error->structs.arith_error->into.go_to ); + } + ELSE + { + } + ENDIF + } + if( not_error ) + { + // There is an NOT ON EXCEPTION phrase: + IF( erf, eq_op, integer_zero_node ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down GOTO not_error->INTO for_argv") + SHOW_PARSE_LABEL(" ", not_error) + } + gg_append_statement( not_error->structs.arith_error->into.go_to ); + } + ELSE + { + } + ENDIF + } + } + if( error ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down LABEL error->bottom") + SHOW_PARSE_LABEL(" ", error) + } + gg_append_statement( error->structs.arith_error->bottom.label ); + } + if( not_error ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down LABEL not_error->bottom") + SHOW_PARSE_LABEL(" ", not_error) + SHOW_PARSE_END + } + gg_append_statement( not_error->structs.arith_error->bottom.label ); + } + } + +void +parser_accept_command_line_count( cbl_refer_t tgt ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + gg_call( VOID, + "__gg__get_argc", + gg_get_address_of(tgt.field->var_decl_node), + refer_offset_dest(tgt), + refer_size_dest(tgt), + NULL_TREE); + } + +void +parser_accept_envar(struct cbl_refer_t tgt, + struct cbl_refer_t envar, + cbl_label_t *error, + cbl_label_t *not_error ) + { + Analyze(); + + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( error ) + { + SHOW_PARSE_LABEL(" error ", error) + } + if( not_error ) + { + SHOW_PARSE_LABEL(" not_error ", not_error) + } + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + static tree erf = gg_define_variable(INT, "..pae_erf", vs_file_static); + + gg_assign(erf, + gg_call_expr( INT, + "__gg__accept_envar", + gg_get_address_of(tgt.field->var_decl_node), + refer_offset_dest(tgt), + refer_size_dest(tgt), + gg_get_address_of(envar.field->var_decl_node), + refer_offset_source(envar), + refer_size_source(envar), + NULL_TREE)); + if( error ) + { + // There is an ON EXCEPTION phrase: + IF( erf, ne_op, integer_zero_node ) + { + gg_append_statement( error->structs.arith_error->into.go_to ); + } + ELSE + { + } + ENDIF + } + if( not_error ) + { + // There is an NOT ON EXCEPTION phrase: + IF( erf, eq_op, integer_zero_node ) + { + gg_append_statement( not_error->structs.arith_error->into.go_to ); + } + ELSE + { + } + ENDIF + } + if( error ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down LABEL error->bottom") + SHOW_PARSE_LABEL(" ", error) + } + gg_append_statement( error->structs.arith_error->bottom.label ); + } + if( not_error ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Laying down LABEL not_error->bottom") + SHOW_PARSE_LABEL(" ", not_error) + SHOW_PARSE_END + } + gg_append_statement( not_error->structs.arith_error->bottom.label ); + } + } + +void +parser_set_envar( struct cbl_refer_t name, struct cbl_refer_t value ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + // Set name to value using setenv(3) + gg_call(BOOL, + "__gg__set_envar", + gg_get_address_of(name.field->var_decl_node), + refer_offset_source(name), + refer_size_source(name), + gg_get_address_of(value.field->var_decl_node), + refer_offset_source(value), + refer_size_source(value), + NULL_TREE); + } + +void +parser_accept_date_yymmdd( struct cbl_field_t *target ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + CHECK_FIELD(target); + + tree pointer = gg_define_char_star(); + gg_assign(pointer, gg_call_expr(CHAR_P, + "__gg__get_date_yymmdd", + NULL_TREE)); + gg_default_qualification(target); + move_tree_to_field( target, + pointer); + + gg_free(pointer); + + TRACE1 + { + TRACE1_HEADER + TRACE1_FIELD("", target, "") + TRACE1_END + } + } + +void +parser_accept_date_yyyymmdd( struct cbl_field_t *target ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + CHECK_FIELD(target); + + tree pointer = gg_define_char_star(); + gg_assign(pointer, gg_call_expr(CHAR_P, + "__gg__get_date_yyyymmdd", + NULL_TREE)); + gg_default_qualification(target); + move_tree_to_field( target, + pointer); + + gg_free(pointer); + + TRACE1 + { + TRACE1_HEADER + TRACE1_FIELD("", target, "") + TRACE1_END + } + } + +void +parser_accept_date_yyddd( struct cbl_field_t *target ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + CHECK_FIELD(target); + + tree pointer = gg_define_char_star(); + gg_assign(pointer, gg_call_expr(CHAR_P, + "__gg__get_date_yyddd", + NULL_TREE)); + gg_default_qualification(target); + move_tree_to_field( target, + pointer); + + gg_free(pointer); + + TRACE1 + { + TRACE1_HEADER + TRACE1_FIELD("", target,""); + TRACE1_END + } + } + +void +parser_accept_date_yyyyddd( struct cbl_field_t *target ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + CHECK_FIELD(target); + + tree pointer = gg_define_char_star(); + gg_assign(pointer, gg_call_expr(CHAR_P, + "__gg__get_yyyyddd", + NULL_TREE)); + gg_default_qualification(target); + move_tree_to_field( target, + pointer); + + gg_free(pointer); + + TRACE1 + { + TRACE1_HEADER + TRACE1_FIELD("", target, "") + TRACE1_END + } + } + +void +parser_accept_date_dow( struct cbl_field_t *target ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + CHECK_FIELD(target); + + tree pointer = gg_define_char_star(); + gg_assign(pointer, gg_call_expr(CHAR_P, + "__gg__get_date_dow", + NULL_TREE)); + gg_default_qualification(target); + move_tree_to_field( target, + pointer); + + gg_free(pointer); + + TRACE1 + { + TRACE1_HEADER + TRACE1_FIELD("", target, "") + TRACE1_END + } + } + +void +parser_accept_date_hhmmssff( struct cbl_field_t *target ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + CHECK_FIELD(target); + + tree pointer = gg_define_char_star(); + gg_assign(pointer, gg_call_expr(CHAR_P, + "__gg__get_date_hhmmssff", + NULL_TREE)); + gg_default_qualification(target); + move_tree_to_field( target, + pointer); + + gg_free(pointer); + + TRACE1 + { + TRACE1_HEADER + TRACE1_FIELD("", target, "") + TRACE1_END + } + } + +/* + * If the encoding is anything but custom, the enumerated type + * cbl_encoding_t suffices to describe it. At least for now, the rest + * of cbl_alphabet_t in those cases is unused. + * + * To get the symbol index: symbol_index(symbol_elem_of(&alphabet)) + * + * The parameter is always a reference to an element in the symbol table. + */ +void +parser_alphabet( cbl_alphabet_t& alphabet ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + fprintf(stderr, "%s\n", alphabet.name); + switch(alphabet.encoding) + { + case ASCII_e: + fprintf(stderr, "ASCII\n"); + break; + case iso646_e: + fprintf(stderr, "ISO646\n"); + break; + case EBCDIC_e: + fprintf(stderr, "EBCDIC\n"); + break; + case custom_encoding_e: + fprintf(stderr, "%s\n", alphabet.name); + break; + } + SHOW_PARSE_END + } + + size_t alphabet_index = symbol_index(symbol_elem_of(&alphabet)); + + switch(alphabet.encoding) + { + case ASCII_e: + case iso646_e: + case EBCDIC_e: + break; + + case custom_encoding_e: + { + unsigned char ach[256]; + + tree table_type = build_array_type_nelts(UCHAR, 256); + tree table256 = gg_define_variable(table_type); + for( int i=0; i<256; i++ ) + { + // character i has the ordinal alphabet[i] + unsigned char ch = ascii_to_internal(i); + + ach[ch] = (alphabet.alphabet[i]); + gg_assign( gg_array_value(table256, ch), + build_int_cst_type(UCHAR, (alphabet.alphabet[i])) ); + } + __gg__alphabet_create(alphabet.encoding, + alphabet_index, + ach, + alphabet.low_index, + alphabet.high_index); + gg_call(VOID, + "__gg__alphabet_create", + build_int_cst_type(INT, alphabet.encoding), + build_int_cst_type(SIZE_T, alphabet_index), + gg_get_address_of(table256), + build_int_cst_type(INT, alphabet.low_index), + build_int_cst_type(INT, alphabet.high_index), + NULL_TREE ); + break; + } + } + } + +void +parser_alphabet_use( cbl_alphabet_t& alphabet ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + switch(alphabet.encoding) + { + case ASCII_e: + fprintf(stderr, "ASCII\n"); + break; + case iso646_e: + fprintf(stderr, "ISO646\n"); + break; + case EBCDIC_e: + fprintf(stderr, "EBCDIC\n"); + break; + case custom_encoding_e: + fprintf(stderr, "%s\n", alphabet.name); + break; + } + SHOW_PARSE_END + } + + size_t alphabet_index = symbol_index(symbol_elem_of(&alphabet)); + + switch(alphabet.encoding) + { + case ASCII_e: + case iso646_e: + case EBCDIC_e: + __gg__low_value_character = DEGENERATE_LOW_VALUE; + __gg__high_value_character = DEGENERATE_HIGH_VALUE; + gg_call(VOID, + "__gg__alphabet_use", + build_int_cst_type(INT, alphabet.encoding), + null_pointer_node, + NULL_TREE); + break; + + case custom_encoding_e: + std::unordered_map::const_iterator it = + __gg__alphabet_states.find(alphabet_index); + + assert( it != __gg__alphabet_states.end()); + __gg__low_value_character = it->second.low_char; + __gg__high_value_character = it->second.high_char; + + gg_call(VOID, + "__gg__alphabet_use", + build_int_cst_type(INT, alphabet.encoding), + build_int_cst_type(SIZE_T, alphabet_index), + NULL_TREE); + break; + } + } + +void +parser_display_literal(const char *literal, bool advance) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" \""); + SHOW_PARSE_TEXT(literal) + SHOW_PARSE_TEXT("\""); + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("About to DISPLAY a literal:") + TRACE1_END + } + + tree file_descriptor = integer_one_node; // Just stdout, for now + gg_write( file_descriptor, + gg_string_literal(literal), + build_int_cst_type(integer_type_node,(int)strlen(literal)) ); + + if( advance ) + { + gg_write( file_descriptor, + gg_string_literal("\n"), + integer_one_node); + } + cursor_at_sol = advance; + } + +void +parser_display_internal(tree file_descriptor, + cbl_refer_t refer, + bool advance) + { + Analyze(); + if( refer.field->type == FldConditional ) + { + TRACE1 + { + gg_create_true_false_statement_lists(refer.field->var_decl_node); + gg_fprintf(file_descriptor, 0, "TRUE"); + ELSE + gg_fprintf(file_descriptor, 0, "FALSE"); + ENDIF + } + } + else if( refer.field->type == FldLiteralA ) + { + gg_call(VOID, + "__gg__display_string", + file_descriptor, + build_string_literal(refer.field->data.capacity, + refer.field->data.initial), + build_int_cst_type(SIZE_T, refer.field->data.capacity), + advance ? integer_one_node : integer_zero_node, + NULL_TREE ); + cursor_at_sol = advance; + } + else if( refer.field->type == FldLiteralN ) + { + // The parser found the string of digits from the source code and converted + // it to a _Float128. + + // The bad news is that something like 555.55 can't be expressed exactly; + // internally it is 555.5499999999.... + + // The good news is that we know any string of 33 or fewer digits is + // converted to _Float128 and then converted back again, you get the same + // string. + + // We make use of that here + + char ach[128]; + strfromf128(ach, sizeof(ach), "%.33E", refer.field->data.value); + char *p = strchr(ach, 'E'); + if( !p ) + { + // Probably INF -INF NAN or -NAN, so ach has our result + } + else + { + p += 1; + int exp = atoi(p); + if( exp >= 6 || exp <= -5 ) + { + // We are going to stick with the E notation, so ach has our result + } + else + { + int precision = 32 - exp; + char achFormat[24]; + sprintf(achFormat, "%%.%df", precision); + strfromf128(ach, sizeof(ach), achFormat, refer.field->data.value); + } + __gg__remove_trailing_zeroes(ach); + } + + if( symbol_decimal_point() == ',' ) + { + char *p = strchr(ach, '.' ); + if( p ) + { + *p = symbol_decimal_point(); + } + } + + gg_write( file_descriptor, + gg_string_literal(ach), + build_int_cst_type(SIZE_T, strlen(ach))); + if( advance ) + { + gg_write( file_descriptor, + gg_string_literal("\n"), + integer_one_node); + } + } + else + { + if( refer_is_clean(refer) ) + { + gg_call(VOID, + "__gg__display_clean", + gg_get_address_of(refer.field->var_decl_node), + file_descriptor, + advance ? integer_one_node : integer_zero_node, + NULL_TREE ); + } + else + { + // We might be dealing with a refmod: + if( refer.refmod.from || refer.refmod.len ) + { + gg_attribute_bit_set(refer.field, refmod_e); + } + gg_call(VOID, + "__gg__display", + gg_get_address_of(refer.field->var_decl_node), + refer_offset_source(refer), + refer_size_source( refer), + file_descriptor, + advance ? integer_one_node : integer_zero_node, + NULL_TREE ); + if( refer.refmod.from || refer.refmod.len ) + { + gg_attribute_bit_clear(refer.field, refmod_e); + } + } + } + cursor_at_sol = advance; + } + +void +parser_display_field(cbl_field_t *field) + { + parser_display_internal_field(integer_one_node, + field, + DISPLAY_NO_ADVANCE); + } + +void +parser_display( const struct cbl_special_name_t *upon, + struct cbl_refer_t refs[], + size_t n, + bool advance ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" parser_display of multiple variables:") + for(size_t i=0; i 1) + { + gg_fprintf(trace_handle, 1, "%ld: ", build_int_cst_type(INT, ii)); + } + TRACE1_REFER("", refs[ii], "") + } + TRACE1_END + } + tree file_descriptor = gg_define_int(); + bool needs_closing = false; + if( upon ) + { + switch(upon->id) + { + case STDOUT_e: + case SYSOUT_e: + case SYSLIST_e: + case SYSLST_e: + case CONSOLE_e: + gg_assign(file_descriptor, integer_one_node); + break; + + case STDERR_e: + case SYSPUNCH_e: + case SYSPCH_e: + gg_assign(file_descriptor, integer_two_node); + break; + + default: + if( upon->os_filename[0] ) + { + tree topen = gg_open( gg_string_literal(upon->os_filename), + build_int_cst_type(INT, O_APPEND|O_WRONLY)); + gg_assign(file_descriptor, topen); + needs_closing = true; + } + else + { + fprintf(stderr, "We don't know what to do in parser_display\n"); + gcc_unreachable(); + } + } + } + else + { + gg_assign(file_descriptor,integer_one_node); // stdout is file descriptor 1. + } + + for(size_t i=0; idata.capacity, + var->attr & signable_e); + retval = gg_cast(var_type, var->data_decl_node); + return retval; + } + +void +parser_assign( size_t nC, cbl_num_result_t *C, + struct cbl_refer_t sourceref, + cbl_label_t *on_error, + cbl_label_t *not_error, + cbl_label_t *compute_error) + { + Analyze(); + RETURN_IF_PARSE_ONLY; + // There might, or might not, already be error and/or not_error labels: + set_up_on_exception_label(on_error); + set_up_on_exception_label(not_error); + set_up_compute_error_label(compute_error); + + SHOW_PARSE + { + SHOW_PARSE_HEADER + } + + TRACE1 + { + TRACE1_HEADER + char ach[32]; + sprintf(ach, "%ld target%s", nC, nC==1 ? "" : "s"); + TRACE1_TEXT(ach); + if( on_error ) + { + TRACE1_TEXT("; with on_error"); + } + if( not_error ) + { + TRACE1_TEXT("; with not_error"); + } + } + + tree error_flag = gg_define_int(0); + + for(size_t i=0; istructs.compute_error->compute_error_code), NULL_TREE); + + static tree erf = gg_define_variable(INT, "..pa_erf", vs_file_static); + if( on_error ) + { + // There is an ON ERROR clause. When there is an ON ERROR clause, and + // there is an error, the TARGET values are to be left unchanged. + IF(compute_error->structs.compute_error->compute_error_code, + ne_op, + integer_zero_node ) + { + // There was an error, so we do NOT replace the destref with the + // sourceref value + TRACE1 + { + TRACE1_INDENT + TRACE1_TEXT("on_error clause; computional error occurred") + } + } + ELSE + { + TRACE1 + { + TRACE1_INDENT + TRACE1_TEXT("on_error clause; no computational error") + } + // There was no computational error. Call the move routine that does + // not replace the target when there is a size error: + TREEPLET tsource; + treeplet_fill_source(tsource, sourceref); + static bool check_for_error = true; + move_helper(erf, + destref, + sourceref, + tsource, + rounded, + check_for_error, + true); + + gg_assign(error_flag, gg_bitwise_or(error_flag, erf)); + IF(error_flag, ne_op, integer_zero_node) + { + TRACE1 + { + TRACE1_INDENT + TRACE1_TEXT("on_error clause; a move error occurred") + } + // There was an error during the move. Set the exception status + // information: + gg_call( VOID, + "__gg__process_compute_error", + build_int_cst_type(INT, compute_error_truncate), + NULL_TREE); + // But because there is an ON ERROR clause, suppress DECLARATIVE + // processing + gg_assign(var_decl_exception_code, integer_zero_node); + } + ELSE + { + TRACE1 + { + TRACE1_INDENT + TRACE1_TEXT("on_error clause; no move") + } + } + ENDIF + } + ENDIF + } + else + { + // There is no ON_ERROR clause, so we do the truncation type move, but + // with one exception. If the error was an exponentiation error that + // resulted in a NaN, we *don't* do the move: + + IF( gg_bitwise_and( compute_error->structs.compute_error->compute_error_code, + build_int_cst_type(INT, + compute_error_exp_minus_by_frac + | compute_error_divide_by_zero)), + ne_op, + integer_zero_node ) + { + // It was a NaN, so don't do the move + TRACE1 + { + TRACE1_INDENT + TRACE1_TEXT("Not moving the NaN") + } + } + ELSE + { + TRACE1 + { + TRACE1_INDENT + TRACE1_TEXT("Doing the move") + } + TREEPLET tsource; + treeplet_fill_source(tsource, sourceref); + static bool check_for_error = true; + move_helper(erf, + destref, + sourceref, + tsource, + rounded, + check_for_error, + false); + gg_assign(error_flag, gg_bitwise_or(error_flag, erf)); + IF(error_flag, ne_op, integer_zero_node) + { + // There was an error during the move. Set the exception status + // information: + TRACE1 + { + TRACE1_INDENT + TRACE1_TEXT("Error during the move; calling __gg__process_compute_error") + } + gg_call( VOID, + "__gg__process_compute_error", + build_int_cst_type(INT, compute_error_truncate), + NULL_TREE); + } + ELSE + { + } + ENDIF + } + ENDIF + } + TRACE1 + { + TRACE1_INDENT + TRACE1_FIELD("source ", sourceref.field, "") + TRACE1_INDENT + TRACE1_FIELD("dest ", destref.field, "") + TRACE1_END + } + } + + if( on_error ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT(" Laying down on_error GOTO into") + SHOW_PARSE_LABEL(" ", on_error) + } + IF( gg_bitwise_or(error_flag, + compute_error->structs.compute_error->compute_error_code), + ne_op, + integer_zero_node ) + { + gg_append_statement( on_error->structs.arith_error->into.go_to ); + } + ELSE + ENDIF + } + else + { + // We weren't given an explicit ON SIZE ERROR label, so we need to go + // with the NO ERROR CLAUSE behavior + if( compute_error ) + { + gg_call( VOID, + "__gg__process_compute_error", + compute_error->structs.compute_error->compute_error_code, + NULL_TREE); + } + } + + if( not_error ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT(" Laying down not_error GOTO into") + SHOW_PARSE_LABEL(" ", not_error) + } + IF( compute_error->structs.compute_error->compute_error_code, eq_op, integer_zero_node ) + { + gg_append_statement( not_error->structs.arith_error->into.go_to ); + } + ELSE + ENDIF + } + + if( on_error ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT(" Laying down on_error LABEL BOTTOM:") + SHOW_PARSE_LABEL(" ", on_error) + } + gg_append_statement( on_error->structs.arith_error->bottom.label ); + } + + if( not_error ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT(" Laying down not_error LABEL BOTTOM:") + SHOW_PARSE_LABEL(" ", not_error) + } + gg_append_statement( not_error->structs.arith_error->bottom.label ); + } + + SHOW_PARSE + { + SHOW_PARSE_END + } + } + +static cbl_figconst_t +is_figconst(cbl_field_t *field) + { + cbl_figconst_t figconst = (cbl_figconst_t)(field->attr & FIGCONST_MASK); + return figconst; + } + +static cbl_figconst_t +is_figconst(cbl_refer_t &sourceref) + { + return is_figconst(sourceref.field); + } + +void +parser_move(cbl_refer_t destref, + cbl_refer_t sourceref, + cbl_round_t rounded, + bool skip_fill_from // Defaults to false + ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( sourceref.field && is_figconst_low(sourceref.field) ) + { + SHOW_PARSE_TEXT(" LOW-VALUE") + } + else if( sourceref.field && is_figconst_zero(sourceref.field) ) + { + SHOW_PARSE_TEXT(" ZERO-VALUE") + } + else if( sourceref.field && is_figconst_space(sourceref.field) ) + { + SHOW_PARSE_TEXT(" SPACE-VALUE") + } + else if( sourceref.field && is_figconst_quote(sourceref.field) ) + { + SHOW_PARSE_TEXT(" QUOTE-VALUE") + } + else if( sourceref.field && is_figconst_high(sourceref.field) ) + { + SHOW_PARSE_TEXT(" HIGH-VALUE") + } + else + { + SHOW_PARSE_REF(" ", sourceref) + } + SHOW_PARSE_REF(" TO ", destref) + switch(rounded) + { + case away_from_zero_e: + SHOW_PARSE_TEXT(" AWAY_FROM_ZERO") + break; + case nearest_toward_zero_e: + SHOW_PARSE_TEXT(" NEAREST_TOWARD_ZERO") + break; + case toward_greater_e: + SHOW_PARSE_TEXT(" TOWARD_GREATER") + break; + case toward_lesser_e: + SHOW_PARSE_TEXT(" TOWARD_LESSER") + break; + case nearest_away_from_zero_e: + SHOW_PARSE_TEXT(" NEAREST_AWAY_FROM_ZERO") + break; + case nearest_even_e: + SHOW_PARSE_TEXT(" NEAREST_EVEN") + break; + case prohibited_e: + SHOW_PARSE_TEXT(" PROHIBITED") + break; + case truncation_e: + SHOW_PARSE_TEXT(" TRUNCATED") + break; + default: + gcc_unreachable(); + break; + } + SHOW_PARSE_END + } + + if( !skip_fill_from ) + { + cbl_figconst_t figconst = is_figconst(sourceref); + if( figconst ) + { + sourceref.all = true; + } + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("About to call move_helper") + } + TREEPLET tsource; + treeplet_fill_source(tsource, sourceref); + static bool dont_check_for_error = false; + move_helper(NULL, destref, sourceref, tsource, rounded, dont_check_for_error ); + + TRACE1 + { + TRACE1_INDENT + TRACE1_REFER_INFO("source ", sourceref) + TRACE1_INDENT + TRACE1_REFER_INFO("dest ", destref) + TRACE1_END + } + } + +static +void +parser_move_multi(cbl_refer_t destref, + cbl_refer_t sourceref, + TREEPLET tsource, + cbl_round_t rounded, + bool skip_fill_from ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( sourceref.field && is_figconst_low(sourceref.field) ) + { + SHOW_PARSE_TEXT(" LOW-VALUE") + } + else if( sourceref.field && is_figconst_zero(sourceref.field) ) + { + SHOW_PARSE_TEXT(" ZERO-VALUE") + } + else if( sourceref.field && is_figconst_space(sourceref.field) ) + { + SHOW_PARSE_TEXT(" SPACE-VALUE") + } + else if( sourceref.field && is_figconst_quote(sourceref.field) ) + { + SHOW_PARSE_TEXT(" QUOTE-VALUE") + } + else if( sourceref.field && is_figconst_high(sourceref.field) ) + { + SHOW_PARSE_TEXT(" HIGH-VALUE") + } + else + { + SHOW_PARSE_REF(" ", sourceref) + } + SHOW_PARSE_REF(" TO ", destref) + switch(rounded) + { + case away_from_zero_e: + SHOW_PARSE_TEXT(" AWAY_FROM_ZERO") + break; + case nearest_toward_zero_e: + SHOW_PARSE_TEXT(" NEAREST_TOWARD_ZERO") + break; + case toward_greater_e: + SHOW_PARSE_TEXT(" TOWARD_GREATER") + break; + case toward_lesser_e: + SHOW_PARSE_TEXT(" TOWARD_LESSER") + break; + case nearest_away_from_zero_e: + SHOW_PARSE_TEXT(" NEAREST_AWAY_FROM_ZERO") + break; + case nearest_even_e: + SHOW_PARSE_TEXT(" NEAREST_EVEN") + break; + case prohibited_e: + SHOW_PARSE_TEXT(" PROHIBITED") + break; + case truncation_e: + SHOW_PARSE_TEXT(" TRUNCATED") + break; + default: + gcc_unreachable(); + break; + } + SHOW_PARSE_END + } + + if( !skip_fill_from ) + { + cbl_figconst_t figconst = is_figconst(sourceref); + if( figconst ) + { + sourceref.all = true; + } + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("About to call move_helper") + } + + static bool dont_check_for_error = false; + move_helper(NULL, destref, sourceref, tsource, rounded, dont_check_for_error ); + + TRACE1 + { + TRACE1_INDENT + TRACE1_REFER_INFO("source ", sourceref) + TRACE1_INDENT + TRACE1_REFER_INFO("dest ", destref) + TRACE1_END + } + } + +void +parser_move(size_t ntgt, cbl_refer_t *tgts, cbl_refer_t src, cbl_round_t rounded) + { + if( mode_syntax_only() ) return; + + cbl_figconst_t figconst = is_figconst(src); + if( figconst ) + { + src.all = true; + } + TREEPLET tsource; + treeplet_fill_source(tsource, src); + static const bool skip_fill_from = true; + for( cbl_refer_t *p=tgts; p < tgts + ntgt; p++ ) + { + parser_move_multi(*p, src, tsource, rounded, skip_fill_from); + } + } + +/* + * "nelem" represents the number of elements in the table. + * "src" is the already-initialized first element of the table + * to be initialized. If nspan == 0, copy the whole record because + * the record either has no filler, or WITH FILLER was specified. + * Otherwise, the spans array comprises a set of {offset,end+1} pairs + * representing sequences of consecutive non-FILLER fields. + * + * "table" is the symbol table index for the table being initialized. + * It may appear in a subsequent call as part of the (sub)tbls array, + * if it is nested in a higher-level table. + */ +void +parser_initialize_table(size_t nelem, + cbl_refer_t src, + size_t nspan, + const cbl_bytespan_t spans[], + size_t table, // symbol table index + size_t ntbl, + const cbl_subtable_t tbls[]) + { + if( mode_syntax_only() ) return; + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_REFER("src: ", src, " ") + TRACE1_END + } + typedef size_t span_t[2]; + static_assert(sizeof(spans[0]) == sizeof(span_t), "pair size wrong"); + static tree tspans = gg_define_variable(SIZE_T_P, "..pit_v1", vs_file_static); + static tree ttbls = gg_define_variable(SIZE_T_P, "..pit_v2", vs_file_static); + gg_assign(tspans, build_array_of_size_t(2*nspan, (const size_t *)spans)); + gg_assign(ttbls, build_array_of_size_t(2*ntbl, (const size_t *)tbls)); + + gg_call(VOID, + "__gg__mirror_range", + build_int_cst_type(SIZE_T, nelem), + gg_get_address_of(src.field->var_decl_node), + refer_offset_source(src), + build_int_cst_type(SIZE_T, nspan), + tspans, + build_int_cst_type(SIZE_T, table), + build_int_cst_type(SIZE_T, ntbl), + ttbls, + NULL_TREE); + + gg_free(tspans); + gg_free(ttbls); + } + +static +tree +tree_type_from_field_type(cbl_field_t *field, size_t &nbytes) + { + /* This routine is used to determine what action is taken with type of a + CALL ... USING and the matching PROCEDURE DIVISION USING of + a PROGRAM-ID or FUNCTION-ID + */ + tree retval = COBOL_FUNCTION_RETURN_TYPE; + nbytes = 8; + if( field ) + { + // This maps a Fldxxx to a C-style variable type: + switch(field->type) + { + case FldGroup: + case FldAlphanumeric: + case FldAlphaEdited: + case FldNumericEdited: + retval = CHAR_P; + nbytes = field->data.capacity; + break; + + case FldNumericDisplay: + case FldNumericBinary: + case FldPacked: + if( field->data.digits > 18 ) + { + retval = UINT128; + nbytes = 16; + } + else + { + retval = SIZE_T; + nbytes = 8; + } + break; + + case FldNumericBin5: + case FldIndex: + case FldPointer: + if( field->data.capacity > 8 ) + { + retval = UINT128; + nbytes = 16; + } + else + { + retval = SIZE_T; + nbytes = 8; + } + break; + + case FldFloat: + if( field->data.capacity == 8 ) + { + retval = DOUBLE; + nbytes = 8; + } + else if( field->data.capacity == 4 ) + { + retval = FLOAT; + nbytes = 4; + } + else + { + retval = FLOAT128; + nbytes = 16; + } + break; + + case FldLiteralN: + // Assume a 64-bit signed integer. This happens for GOBACK STATUS 101, + // the like + retval = LONG; + nbytes = 8; + break; + + default: + cbl_internal_error( "%s(): Invalid field type %s:", + __func__, + cbl_field_type_str(field->type)); + break; + } + } + if( retval == SIZE_T && field->attr & signable_e ) + { + retval = SSIZE_T; + } + if( retval == UINT128 && field->attr & signable_e ) + { + retval = INT128; + } + return retval; + } + +static void +restore_local_variables() + { + gg_call(VOID, + "__gg__pop_local_variables", + NULL_TREE); + gg_decrement(var_decl_unique_prog_id); + } + +static inline bool +is_valuable( cbl_field_type_t type ) { + switch ( type ) { + case FldInvalid: + case FldGroup: + case FldAlphanumeric: + case FldNumericEdited: + case FldAlphaEdited: + case FldLiteralA: + case FldClass: + case FldConditional: + case FldForward: + case FldSwitch: + case FldDisplay: + case FldBlob: + return false; + // These are variable types that have to be converted from their + // COBOL form to a little-endian binary representation so that they + // can be conveyed BY CONTENT/BY VALUE in a CALL or user-defined + // function activation. + case FldNumericDisplay: + case FldNumericBinary: + case FldFloat: + case FldPacked: + case FldNumericBin5: + case FldLiteralN: + case FldIndex: + case FldPointer: + return true; + } + cbl_internal_error( "%s:%d: invalid symbol_type_t %d", __func__, __LINE__, type ); + return false; +} + +void parser_sleep(cbl_refer_t seconds) + { + if( seconds.field ) + { + gg_get_address_of(seconds.field->var_decl_node); + //refer_offset_source(seconds); + //refer_size_source(seconds); + + gg_call(VOID, + "__gg__sleep", + gg_get_address_of(seconds.field->var_decl_node), + refer_offset_source(seconds), + refer_size_source(seconds), + NULL_TREE); + } + else + { + // This is a naked place-holding CONTINUE. Generate some do-nothing + // code that will stick some .LOC information into the assembly language, + // so that GDB-COBOL can display the CONTINUE statement. + gg_assign(var_decl_nop, build_int_cst_type(INT, 103)); + } + } + +void +parser_exit_program(void) // exits back to COBOL only, else continue + { + static cbl_label_t this_program = {}; + static cbl_refer_t magic_refer(&this_program, false); + parser_exit( magic_refer ); + } + +/* + * If RETURNING was specified, the field is provided as an argument, no lookup + * necessary. refer.field == NULL means exit(0) unless ec != ec_none_e. + * If ec == ec_all_e, that indicates RAISING LAST EXCEPTION was used. + */ + +static +void +pe_stuff(cbl_refer_t refer, ec_type_t ec) + { + // This is the moral equivalent of a C "return xyz;". + + // There cannot be both a non-zero exit status and an exception condition. + gcc_assert( !(ec != ec_none_e && refer.field != NULL) ); + + gg_call(VOID, + "__gg__pseudo_return_flush", + NULL_TREE); + + cbl_field_t *returner = refer.field ? refer.field : current_function->returning; + + if( returner ) + { + cbl_field_type_t field_type = returner->type; + size_t nbytes = 0; + tree return_type = tree_type_from_field_type(returner, + nbytes); + tree retval = gg_define_variable(return_type); + + gg_assign(retval, gg_cast(return_type, integer_zero_node)); + + gg_modify_function_type(current_function->function_decl, + return_type); + + if( is_valuable( field_type ) ) + { + // The field being returned is numeric. + if( field_type == FldNumericBin5 + || field_type == FldFloat + || field_type == FldPointer + || field_type == FldIndex ) + { + // These are easily handled because they are all little-endian. + gg_memcpy(gg_get_address_of(retval), + member(returner, "data"), + build_int_cst_type( SIZE_T, + std::min(nbytes, (size_t)returner->data.capacity))); + } + else + { + // The field_type has a PICTURE string, so we need to convert from the + // COBOL form to little-endian binary: + tree value = gg_define_int128(); + get_binary_value( value, + NULL, + returner, + size_t_zero_node); + gg_memcpy(gg_get_address_of(retval), + gg_get_address_of(value), + build_int_cst_type(SIZE_T, nbytes)); + } + restore_local_variables(); + gg_return(retval); + } + else + { + // The RETURNING type is a group or alphanumeric + + // The byte array to be returned is in returning, which is a local + // variable on the stack. We need to make a copy of it to avoid the + // error of returning a pointer to data on the stack. + + tree array_type = build_array_type_nelts(UCHAR, + returner->data.capacity); + tree retval = gg_define_variable(array_type, vs_static); + gg_memcpy(gg_get_address_of(retval), + member(returner->var_decl_node, "data"), + member(returner->var_decl_node, "capacity")); + + tree actual = gg_cast(COBOL_FUNCTION_RETURN_TYPE, gg_get_address_of(retval)); + + restore_local_variables(); + gg_return(actual); + } + } + else + { + // There is no explicit value. This means, by default (according to) + // IBM), we return the value found in RETURN-CODE: + tree value = gg_define_variable(COBOL_FUNCTION_RETURN_TYPE); + gg_assign(value, + gg_cast(COBOL_FUNCTION_RETURN_TYPE, + var_decl_return_code)); + restore_local_variables(); + gg_return(gg_cast(COBOL_FUNCTION_RETURN_TYPE, value)); + } + } + +void +parser_exit( cbl_refer_t refer, ec_type_t ec ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( gg_trans_unit.function_stack.size() + && current_function->returning + && !refer.field) + { + // ->returning works only if there is no refer.field + SHOW_PARSE_FIELD(" RETURNING ", current_function->returning); + } + if( gg_trans_unit.function_stack.size() && refer.field ) + { + SHOW_PARSE_FIELD(" WITH STATUS ", refer.field); + } + if( gg_trans_unit.function_stack.size() && refer.prog_func ) + { + SHOW_PARSE_TEXT(" refer.prog_func is non-zero") + } + + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + if( refer.prog_func ) + { + // We are processing EXIT PROGRAM. If main() called us, we need to do + // nothing. Otherwise, this is a return + IF( current_function->called_by_main_counter, eq_op, integer_zero_node ) + { + // This function wasn't called by main, so we treat it like a GOBACK + pe_stuff(refer, ec); + } + ELSE + { + // This function was called by main. Is it the first call, or is it + // recursive? + IF( current_function->called_by_main_counter, gt_op, integer_one_node ) + { + // This was a recursive call into the function originally called by + // main. Because we are under the control of a calling program, we + // treat this like a GOBACK + pe_stuff(refer, ec); + } + ELSE + { + // We are not under the control of a calling program, meaning that we + // were called by main(). So, we do nothing, meaning we behave like + // a CONTINUE. + } + ENDIF + } + ENDIF + } + else + { + IF( current_function->called_by_main_counter, gt_op, integer_zero_node ) + { + // This wasn't an EXIT PROGRAM. But in the case where we are the program + // that was called by main(), we need to do some bookkeeping so that we + // respond properly to an EXIT PROGRAM should one appear + gg_decrement(current_function->called_by_main_counter); + } + ELSE + { + } + ENDIF + pe_stuff(refer, ec); + } + } + +static void +walk_initialization(cbl_field_t *field, bool initialized, bool deallocate) + { + if( !(field->attr & based_e) ) + { + // We are concerned only with BASED variables + return; + } + symbol_elem_t *e = symbol_at(field_index(field)); + bool first_time = true; + while( e < symbols_end() ) + { + symbol_elem_t& element = *e++; + if( element.type == SymField ) + { + cbl_field_t *this_one = cbl_field_of(&element); + if( !first_time ) + { + if( this_one->level == LEVEL01 || this_one->level == LEVEL77 ) + { + // Having encountered the next 01 or 77, we are done + break; + } + } + first_time = false; + if( this_one->level == 00 ) + { + // Ignore LEVEL00 "INDEXED BY" variables + continue; + } + if(deallocate) + { + gg_assign(member(this_one->var_decl_node, "data"), + gg_cast(UCHAR_P, null_pointer_node)); + } + else + { + gg_assign(member(this_one->var_decl_node, "data"), + gg_add(member(field->var_decl_node, "data"), + build_int_cst_type(SIZE_T, this_one->offset))); + if( this_one->level == 66 + || this_one->level == 88 + || symbol_redefines(this_one) ) + { + continue; + } + if( !initialized ) + { + // This is ALLOCATE Rule 9) in ISO 2023 + if( this_one->type == FldPointer ) + { + gg_memset(member(this_one->var_decl_node, "data"), + integer_zero_node, + build_int_cst_type(SIZE_T, this_one->data.capacity)); + } + } + } + } + } + } + +void +parser_allocate(cbl_refer_t size_or_based, + cbl_refer_t returning, + bool initialized ) + { + /* + * If the 1st parameter has based_e attribute, the field it is based on defines + * the number of bytes to allocate. In that case, "returning" is optional and + * may have a NULL field. Otherwise the 1st parameter is a numeric value and + * allocated space is assigned to "returning", which is of type FldPointer. + */ + + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_REF(" size_or_based from:", size_or_based) + SHOW_PARSE_INDENT + SHOW_PARSE_REF("returning: ", returning) + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_REFER("size_or_based: ", size_or_based, ""); + TRACE1_INDENT + TRACE1_REFER("returning: ", size_or_based, ""); + TRACE1_END + } + + if( returning.field ) + { + // If there is a returning, it has to be a pointer + gcc_assert(returning.field->type == FldPointer); + } + + if( !(size_or_based.field->attr & based_e) ) + { + // If the first is not based, then there must be a returning + gcc_assert(returning.field); + } + + cbl_field_t *f_working = current_options().initial_working(); + cbl_field_t *f_local = current_options().initial_local(); + + int default_byte = wsclear() ? *wsclear() : -1; + + gg_call(VOID, + "__gg__allocate", + gg_get_address_of(size_or_based.field->var_decl_node), + refer_offset_source(size_or_based) , + initialized ? integer_one_node : integer_zero_node, + build_int_cst_type(INT, default_byte), + f_working ? gg_get_address_of(f_working->var_decl_node) : null_pointer_node, + f_local ? gg_get_address_of(f_local-> var_decl_node) : null_pointer_node, + returning.field ? gg_get_address_of(returning.field->var_decl_node) + : null_pointer_node, + returning.field ? refer_offset_source(returning) + : size_t_zero_node, + NULL_TREE); + walk_initialization(size_or_based.field, initialized, false); + } + +void +parser_free( size_t n, cbl_refer_t refers[] ) + { + if( mode_syntax_only() ) return; // Normally handled by SHOW_PARSE, if present + + Analyze(); + for( auto p = refers; p < refers + n; p++ ) + { + gcc_assert( ! p->all ); + gcc_assert( ! p->is_refmod_reference() ); + if( !(p->field->type == FldPointer || p->addr_of || (p->field->attr & based_e)) ) + { + dbgmsg("Deallocating %s means it has to be FldPointer or addr_of or based_e"); + } + gcc_assert( p->field->type == FldPointer || p->addr_of || (p->field->attr & based_e) ); + + gg_call(VOID, + "__gg__deallocate", + gg_get_address_of(p->field->var_decl_node), + refer_offset_source(*p), + p->addr_of ? integer_one_node : integer_zero_node, + NULL_TREE); + walk_initialization(p->field, false, true); + } + } + +void +parser_arith_error(cbl_label_t *arithmetic_label) + { + // We can't use Analyze() on this one, because the exit ends up being laid + // down before the enter when the goto logic gets untangled by the compiler. + + // We are entering either SIZE ERROR or NOT SIZE ERROR code + RETURN_IF_PARSE_ONLY; + set_up_on_exception_label(arithmetic_label); + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" Laying down GOTO OVER") + SHOW_PARSE_LABEL(" ", arithmetic_label) + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT(" Laying down LABEL INTO:") + SHOW_PARSE_LABEL(" ", arithmetic_label) + SHOW_PARSE_END + } + + // Jump over the [NOT] ON EXCEPTION code that is about to be laid down + gg_append_statement( arithmetic_label->structs.arith_error->over.go_to ); + // Create the label that allows the following code to be executed at + // when an ERROR, or NOT ERROR, has been determined to have taken place: + gg_append_statement( arithmetic_label->structs.arith_error->into.label ); + } + +void +parser_arith_error_end(cbl_label_t *arithmetic_label) + { + // We can't use Analyze() on this one, because the exit ends up being laid + // down before the enter when the goto logic gets untangled by the compiler. + + // We have reached the end of the ERROR, or NOT ERROR, code. + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" Laying down GOTO BOTTOM") + SHOW_PARSE_LABEL(" ", arithmetic_label) + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT(" Laying down LABEL OVER:") + SHOW_PARSE_LABEL(" ", arithmetic_label) + SHOW_PARSE_END + } + + // Jump to the end of the arithmetic code: + gg_append_statement( arithmetic_label->structs.arith_error->bottom.go_to ); + // Lay down the label that allows the ERROR/NOT ERROR instructions + // to exist in a lacuna that doesn't get executed unless somebody jumps + // to it: + gg_append_statement( arithmetic_label->structs.arith_error->over.label ); + } + +static void +propogate_linkage_offsets(cbl_field_t *field, tree base) + { + if( field->level == LEVEL01 || field->level == LEVEL77 ) + { + field->data_decl_node = base; + symbol_elem_t *e = symbol_at(field_index(field)); + // We already updated the data pointer of the first element: + e += 1; + while( e < symbols_end() ) + { + symbol_elem_t& element = *e++; + if( element.type == SymField ) + { + cbl_field_t *this_one = cbl_field_of(&element); + if( this_one->level == LEVEL01 || this_one->level == LEVEL77 ) + { + // We have encountered another level 01/77. If this LEVEL 01 had a + // parent, then we have to assume that this is a redefines of another + // level 01/77. + if( this_one->parent ) + { + // And, gloriously and frighteningly, it can be handled by + // recursion: + propogate_linkage_offsets(this_one, base); + } + else + { + // Having encountered the next 01 or 77, we are done + break; + } + } + if( this_one->level == 00 ) + { + // Ignore LEVEL00 "INDEXED BY" variables + continue; + } + tree offset = gg_define_variable(SIZE_T); + IF( base, eq_op, gg_cast(UCHAR_P, null_pointer_node) ) + { + gg_assign(offset, size_t_zero_node); + } + ELSE + { + gg_assign(offset, member(this_one, "offset")); + } + ENDIF + this_one->data_decl_node = base; + member( this_one, + "data", + gg_add(base, offset)); + } + } + } + } + +static bool initialized_data = false; +static void +initialize_the_data() + { + if( initialized_data ) + { + return; + } + initialized_data = true; + // Here is where we initialize the run-time list of currency symbols: + const char *default_currency = "$"; + + // This is one-time initialization of the libgcobol program state stack + gg_call(VOID, + "__gg__init_program_state", + NULL_TREE); + + __gg__currency_signs = __gg__ct_currency_signs; + // We initialize currency both at compile time and run time + __gg__currency_sign_init(); + gg_call(VOID, + "__gg__currency_sign_init", + NULL_TREE); + + gg_call(VOID, + "__gg__set_program_name", + gg_string_literal( current_filename.back().c_str() ), + NULL_TREE); + + for(int symbol=0; symbol<256; symbol++) + { + const char *sign = symbol_currency(symbol); + if( sign ) + { + default_currency = NULL; + + // Both compile-time and run-time + __gg__currency_sign(symbol, sign); + gg_call(VOID, + "__gg__currency_sign", + build_int_cst_type(INT, symbol), + build_string_literal(strlen(sign)+1, sign), + NULL_TREE); + } + } + if( default_currency ) + { + __gg__currency_sign(default_currency[0], default_currency); + gg_call(VOID, + "__gg__currency_sign", + char_nodes[(int)default_currency[0]], + gg_string_literal(default_currency), + NULL_TREE); + } + + // It's time to tell the library about DECIMAL-POINT IS COMMA: + if( symbol_decimal_point() == ',' ) + { + __gg__decimal_point = ascii_comma ; + __gg__decimal_separator = ascii_period ; + gg_call(VOID, + "__gg__decimal_point_is_comma", + NULL_TREE); + } + } + +void +parser_division(cbl_division_t division, + cbl_field_t *returning, + size_t nusing, + cbl_ffi_arg_t args[] ) + { + // This is called when the parser enters a COBOL program DIVISION. See + // parser_divide for the arithmetic operation. + + if( mode_syntax_only() ) return; + + // Do this before the SHOW_PARSE; it makes a little more sense when reviewing + // the SHOW_PARSE output. + if( division == identification_div_e ) + { + initialized_data = false; + if( gg_trans_unit.function_stack.size() >= 1 ) + { + // This is a nested program. So, we need to tie off the current + // section: + leave_paragraph_internal(); + leave_section_internal(); + } + } + + if( division == environment_div_e ) + { + initialized_data = false; + } + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" ") + switch(division) + { + case identification_div_e: + SHOW_PARSE_TEXT("IDENTIFICATION") + break; + case environment_div_e: + SHOW_PARSE_TEXT("ENVIRONMENT") + break; + case data_div_e: + SHOW_PARSE_TEXT("DATA") + break; + case procedure_div_e: + SHOW_PARSE_TEXT("PROCEDURE") + break; + } + + SHOW_PARSE_END + } + + gg_set_current_line_number(CURRENT_LINE_NUMBER); + + if( division == data_div_e ) + { + Analyze(); + initialize_the_data(); + } + if( division == environment_div_e ) + { + Analyze(); + initialize_the_data(); + } + else if( division == procedure_div_e ) + { + Analyze(); + initialize_the_data(); + + // Do some symbol table index bookkeeping. current_program_index() is valid + // at this point in time: + current_function->our_symbol_table_index = current_program_index(); + + // We have some housekeeping to do to keep track of the list of functions + // accessible by us: + + // For every procedure, we need a variable that points to the list of + // available program names. + + // We need a pointer to the array of program names + char ach[2*sizeof(cbl_name_t)]; + sprintf(ach, + "..accessible_program_list_%ld", + current_function->our_symbol_table_index); + tree prog_list = gg_define_variable(build_pointer_type(CHAR_P), + ach, vs_file_static); + + // Likewise, we need a pointer to the array of pointers to functions: + tree function_type = + build_varargs_function_type_array( SIZE_T, + 0, // No parameters yet + NULL); // And, hence, no types + tree pointer_type = build_pointer_type(function_type); + tree constructed_array_type = build_array_type_nelts(pointer_type, 1); + sprintf(ach, + "..accessible_program_pointers_%ld", + current_function->our_symbol_table_index); + tree prog_pointers = gg_define_variable( + build_pointer_type(constructed_array_type), + ach, + vs_file_static); + gg_call(VOID, + "__gg__set_program_list", + build_int_cst_type(INT, current_function->our_symbol_table_index), + gg_get_address_of(prog_list), + gg_get_address_of(prog_pointers), + NULL_TREE); + + if( gg_trans_unit.function_stack.size() == 1 ) + { + gg_create_goto_pair(&label_list_out_goto, + &label_list_out_label); + gg_create_goto_pair(&label_list_back_goto, + &label_list_back_label); + gg_append_statement(label_list_out_goto); + gg_append_statement(label_list_back_label); + } + + tree globals_are_initialized = gg_declare_variable( INT, + "__gg__globals_are_initialized", + NULL, + vs_external_reference); + IF( globals_are_initialized, eq_op, integer_zero_node ) + { + // one-time initialization happens here + + // We need to establish the initial value of the UPSI-1 switch register + // We are using IBM's conventions: + // https://www.ibm.com/docs/en/zvse/6.2?topic=SSB27H_6.2.0/fa2sf_communicate_appl_progs_via_job_control.html + // UPSI 10000110 means that bits 0, 5, and 6 are on, which means that + // SW-0, SW-5, and SW-6 are on. + gg_call(VOID, + "__gg__set_initial_switch_value", + NULL_TREE); + + // And then flag one-time initialization as having been done. + gg_assign(globals_are_initialized, integer_one_node); + } + ELSE + ENDIF + + gg_append_statement(current_function->skip_init_label); + // This is where we check to see if somebody tried to cancel us + tree cancelled = gg_define_int(); + gg_assign(cancelled, + gg_call_expr( INT, + "__gg__is_canceled", + gg_cast(SIZE_T, + current_function->function_address), + NULL_TREE)); + IF( cancelled, ne_op, integer_zero_node ) + { + // Somebody flagged us for CANCEL, which means reinitialization, so we + // need to find the _INITIALIZE_PROGRAM section label. + + // gg_printf("Somebody wants to cancel %s\n", + // gg_string_literal(current_function->our_unmangled_name), + // NULL_TREE); + cbl_label_t *prog = cbl_label_of(symbol_at(current_program_index())); + size_t initializer_index = prog->initial_section; + cbl_label_t *initializer = cbl_label_of(symbol_at(initializer_index)); + parser_perform(initializer, true); // true means suppress nexting + } + ELSE + ENDIF + + // RETURNING variables are supposed to be in the linkage section, which + // means that we didn't assign any storage to them during + // parser_symbol_add(). We do that here. + + // returning also needs to behave like local storage, even though it is + // in linkage. + + // This counter is used to help keep track of local variables + gg_increment(var_decl_unique_prog_id); + if( returning ) + { + parser_local_add(returning); + current_function->returning = returning; + } + + // Stash the returning variables for use during parser_return() + current_function->returning = returning; + + if( gg_trans_unit.function_stack.size() == 1 ) + { + // We are entering a new top-level program, so we need to set + // RETURN-CODE to zero + gg_assign(var_decl_return_code, build_int_cst_type(SHORT, 0)); + } + + // The parameters passed to this program might be 64 bits or 128 bits in + // length. We establish those lengths based on the types of the target + // for each USING. + + for(size_t i=0; iname); + + size_t nbytes = 0; + tree par_type = tree_type_from_field_type(args[i].refer.field, nbytes); + if( par_type == FLOAT ) + { + par_type = SSIZE_T; + } + if( par_type == DOUBLE ) + { + par_type = SSIZE_T; + } + if( par_type == FLOAT128 ) + { + par_type = INT128; + } + chain_parameter_to_function(current_function->function_decl, par_type, ach); + } + + bool check_for_parameter_count = false; + + if( nusing ) + { + // During the call, we saved the parameter_count and an array of variable + // lengths. We need to look at those values if, and only if, one or more + // of our USING arguments has an OPTIONAL flag or if one of our targets is + // marked as VARYING. + for(size_t i=0; iattr & any_length_e ) + { + check_for_parameter_count = true; + break; + } + } + + if( check_for_parameter_count ) + { + IF( var_decl_call_parameter_signature, + eq_op, + gg_cast(CHAR_P, current_function->function_address) ) + { + // We know to use var_decl_call_parameter_count, so unflag this + // pointer to avoid problems in the ridiculous possibility of + // COBOL-A calls C_B calls COBOL_A + gg_assign(var_decl_call_parameter_signature, + gg_cast(CHAR_P, null_pointer_node)); + } + ELSE + { + // We were apparently called by a C routine, not a COBOL routine, so + // make sure we don't get shortchanged by a count left behind from an + // earlier COBOL call. + gg_assign(var_decl_call_parameter_count, + build_int_cst_type(INT, A_ZILLION)); + } + ENDIF + } + else + { + // None of our parameters require a count, so make sure we don't get + // bamboozled by a count left behind from an earlier COBOL call. + gg_assign(var_decl_call_parameter_count, + build_int_cst_type(INT, A_ZILLION)); + } + + // There are 'nusing' elements in the PROCEDURE DIVISION USING list. + + tree parameter; + tree rt_i = gg_define_int(); + for(size_t i=0; ivar_decl_node, "data"), + ne_op, + gg_cast(UCHAR_P, null_pointer_node) ) + { + gg_call(VOID, + "__gg__push_local_variable", + gg_get_address_of(args[i].refer.field->var_decl_node), + NULL_TREE); + } + ELSE + ENDIF + + tree base = gg_define_variable(UCHAR_P); + gg_assign(rt_i, build_int_cst_type(INT, i)); + //gg_printf("The rt_i counter is %d\n", rt_i, NULL_TREE); + IF( rt_i, lt_op , var_decl_call_parameter_count ) + { + if( i == 0 ) + { + // This is the first parameter. + parameter = DECL_ARGUMENTS(current_function->function_decl); + } + else + { + // These are subsequent parameters + parameter = TREE_CHAIN(parameter); + } + gg_assign(base, gg_cast(UCHAR_P, parameter)); + + IF( gg_call_expr( CHAR_P, + "getenv", + gg_string_literal("PARAMETERS_ON_ENTRY"), + NULL_TREE), + ne_op, + gg_cast(CHAR_P, null_pointer_node)); + { + gg_printf("parameter_on_entry: %s(): %d %p\n", + gg_string_literal(current_function->our_unmangled_name), + build_int_cst_type(INT, i+1), + base, + NULL_TREE); + } + ELSE + ENDIF + + if( args[i].refer.field->attr & any_length_e ) + { + //gg_printf("side channel 0x%lx\n", gg_array_value(var_decl_call_parameter_lengths, rt_i), NULL_TREE); + + // Get the length from the global lengths[] side channel. Don't + // forget to use the length mask on the table value. + gg_assign(member(args[i].refer.field->var_decl_node, "capacity"), + gg_array_value(var_decl_call_parameter_lengths, rt_i)); + } + } + ELSE + { + gg_assign(base, gg_cast(UCHAR_P, null_pointer_node)); + } + ENDIF + + // Arriving here means that we are processing an instruction like + // this: + // PROCEDURE DIVISION USING using[0] using[1] ... using using[nusing-1] + + // When __gg__call_parameter_count is equal to A_ZILLION, then this is + // an OTHER-TO-COBOL call and the var_decl_call_parameter_lengths array + // is not valid + + cbl_ffi_crv_t crv = args[i].crv; + cbl_field_t *new_var = args[i].refer.field; + + if( crv == by_value_e ) + { + switch(new_var->type) + { + case FldGroup: + case FldAlphanumeric: + case FldAlphaEdited: + case FldNumericEdited: + crv = by_reference_e; + break; + default: + break; + } + } + + if( crv == by_value_e ) + { + // 'parameter' is the 64-bit or 128-bit value that was placed on the stack + + size_t nbytes; + tree_type_from_field_type(new_var, nbytes); + tree parm = gg_define_variable(INT128); + + if( nbytes <= 8 ) + { + // Our input is a 64-bit number + if( new_var->attr & signable_e ) + { + IF( gg_bitwise_and( gg_cast(SIZE_T, base), + build_int_cst_type(SIZE_T, 0x8000000000000000ULL)), + ne_op, + gg_cast(SIZE_T, integer_zero_node) ) + { + // Our input is a negative number + gg_assign(parm, gg_cast(INT128, integer_minus_one_node)); + } + ELSE + { + // Our input is a positive number + gg_assign(parm, gg_cast(INT128, integer_zero_node)); + } + ENDIF + } + else + { + // This is a 64-bit positive number: + gg_assign(parm, gg_cast(INT128, integer_zero_node)); + } + } + // At this point, parm has been set to 0 or -1 + + gg_memcpy(gg_get_address_of(parm), + gg_get_address_of(base), + build_int_cst_type(SIZE_T, nbytes)); + + tree array_type = build_array_type_nelts(UCHAR, new_var->data.capacity); + tree data_decl_node = gg_define_variable( array_type, + NULL, + vs_static); + gg_assign( member(new_var->var_decl_node, "data"), + gg_get_address_of(data_decl_node) ); + + // And then move it into place + gg_call(VOID, + "__gg__assign_value_from_stack", + gg_get_address_of(new_var->var_decl_node), + parm, + NULL_TREE); + // We now have to handle an oddball situation. It's possible we are + // dealing with + // + // linkage section. + // 01 var1 + // 01 var2 redefines var1 + // + // If so, we have to give var2::data_pointer the same value as + // var1::data_pointer + // + cbl_field_t *next_var; + size_t our_index = symbol_index(symbol_elem_of(new_var)); + size_t next_index = our_index + 1; + // Look ahead in the symbol table for the next LEVEL01/77 + for(;;) + { + symbol_elem_t *e = symbol_at(next_index); + if( e->type != SymField ) + { + break; + } + next_var = cbl_field_of(e); + if( !next_var ) + { + break; + } + if( next_var->level == LEVEL01 || next_var->level == LEVEL77 ) + { + if( next_var->parent == our_index ) + { + gg_assign(member(next_var->var_decl_node, "data"), + member(new_var->var_decl_node, "data")); + } + break; + } + next_index += 1; + } + } + else + { + // 'parameter' is a reference, so it it becomes the data member of + // the cblc_field_t COBOL variable. + gg_assign(member(args[i].field()->var_decl_node, "data"), base); + + // We need to apply base + offset to the LINKAGE variable + // and all of its children + propogate_linkage_offsets( args[i].field(), base ); + } + } + } + + gg_call(VOID, + "__gg__pseudo_return_bookmark", + NULL_TREE); + + // The MODULE-NAME function requires a stack of program names. We push the + // name on here. The first character is a 'T' or an 'N', where 'N' means + // this is a nested program. + + if( gg_trans_unit.function_stack.size() > 1 ) + { + // This is a nested program + strcpy(ach, "N"); + } + else + { + // This is a top-level program: + strcpy(ach, "T"); + } + strcat(ach, current_function->our_unmangled_name); + gg_call(VOID, + "__gg__module_name_push", + gg_string_literal(ach), + NULL_TREE); + + IF( var_decl_main_called, ne_op, integer_zero_node ) + { + // We were just called by main: + gg_assign(var_decl_main_called, integer_zero_node); + gg_assign(current_function->called_by_main_counter, integer_one_node); + } + ELSE + { + // This isn't a call from main(), but it might be a recursive call to the + // function that was called by main: + IF(current_function->called_by_main_counter, ne_op, integer_zero_node) + { + // In that case, we bump the counter to keep track of things. + gg_increment(current_function->called_by_main_counter); + } + ELSE + { + } + ENDIF + } + ENDIF + } + } + +void +parser_logop( struct cbl_field_t *tgt, + struct cbl_field_t *a, // Is NULL for single-valued ops + enum logop_t logop, + struct cbl_field_t *b ) + { + Analyze(); + SHOW_PARSE + { + if( logop == true_op) + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", tgt) + SHOW_PARSE_TEXT(" will be set to TRUE ") + } + else if( logop == false_op) + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", tgt) + SHOW_PARSE_TEXT(" will be set to FALSE ") + } + else + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", tgt) + SHOW_PARSE_TEXT(" = ") + if( a ) + { + SHOW_PARSE_FIELD("", a) + } + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT( cbl_logop_str(logop) ) + if( b ) + { + SHOW_PARSE_FIELD(" ", b) + } + } + SHOW_PARSE_END + } + + CHECK_FIELD(tgt); + switch(logop) + { + case and_op: + case or_op: + case xor_op: + case xnor_op: + case not_op: + CHECK_FIELD(b); + break; + default: + break; + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("operation: ", cbl_logop_str(logop), "") + TRACE1_END + if( logop != true_op ) + { + if( a ) + { + TRACE1_INDENT + TRACE1_FIELD("operand A: ", a, ""); + } + TRACE1_INDENT + if( b ) + { + TRACE1_FIELD("operand B: ", b, ""); + } + TRACE1_END + } + } + + switch(logop) + { + case and_op: + case or_op: + case xor_op: + case xnor_op: + CHECK_FIELD(a); + break; + default: + break; + } + + // This routine takes two conditionals and a logical operator. From those, + // it creates and returns another conditional: + + if( tgt->type != FldConditional ) + { + cbl_internal_error("parser_logop() was called with variable %s on line %d" + ", which is not a FldConditional\n", + tgt->name, cobol_location().first_line); + } + if( a && a->type != FldConditional ) + { + cbl_internal_error("parser_logop() was called with variable %s on line %d" + ", which is not a FldConditional\n", + a->name, cobol_location().first_line); + } + if( b && b->type != FldConditional ) + { + cbl_internal_error("parser_logop() was called with variable %s on line %d" + ", which is not a FldConditional\n", + b->name, cobol_location().first_line); + } + + switch( logop ) + { + case and_op: + gg_assign(tgt->var_decl_node, gg_build_logical_expression( + a->var_decl_node, + and_op, + b->var_decl_node)); + break; + + case or_op: + gg_assign(tgt->var_decl_node, gg_build_logical_expression( + a->var_decl_node, + or_op, + b->var_decl_node)); + break; + + case not_op: + gg_assign(tgt->var_decl_node, gg_build_logical_expression( + NULL, + not_op, + b->var_decl_node)); + break; + + case xor_op: + gg_assign(tgt->var_decl_node, gg_build_logical_expression( + a->var_decl_node, + xor_op, + b->var_decl_node)); + break; + + case xnor_op: + { + gg_assign( tgt->var_decl_node, + gg_build_logical_expression(a->var_decl_node, + xor_op, + b->var_decl_node)); + + // I need to negate the result. + + gg_assign(tgt->var_decl_node, gg_build_logical_expression( + NULL, + not_op, + tgt->var_decl_node)); + } + break; + + case true_op: + gg_assign(tgt->var_decl_node, boolean_true_node); + break; + + case false_op: + gg_assign(tgt->var_decl_node, boolean_false_node); + break; + } + + TRACE1 + { + TRACE1_INDENT + TRACE1_TEXT_ABC("result: ", tgt->name, "") + TRACE1_FIELD_VALUE("", tgt, "") + TRACE1_END + } + } + +void +parser_relop( cbl_field_t *tgt, + cbl_refer_t aref, + enum relop_t relop, + cbl_refer_t bref ) + { + Analyze(); + cbl_field_t *a = aref.field, *b = bref.field; + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", tgt) + SHOW_PARSE_REF(" = ", aref) + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(relop_str(relop)) + SHOW_PARSE_REF(" ", bref) + SHOW_PARSE_END + } + + CHECK_FIELD(tgt); + CHECK_FIELD(a); + CHECK_FIELD(b); + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("operation: ", relop_str(relop), "") + TRACE1_INDENT + TRACE1_REFER("operand A: ", aref, ""); + TRACE1_INDENT + TRACE1_REFER("operand B: ", bref, ""); + } + + // This routine builds the relational expression and returns the TREE as + // a conditional: + + if( tgt->type != FldConditional ) + { + cbl_internal_error("parser_relop() was called with variable %s, " + "which is not a FldConditional\n", + tgt->name); + } + + static tree comp_res = gg_define_variable(INT, "..pr_comp_res", vs_file_static); + cobol_compare(comp_res, aref, bref); + + // comp_res is negative, zero, position for less-than, equal-to, greater-than + + // So, we simply compare the result of the comparison to zero using the relop + // we were given to turn it into a TRUE/FALSE + gg_assign( tgt->var_decl_node, + gg_build_relational_expression( comp_res, + relop, + integer_zero_node)); + TRACE1 + { + TRACE1_INDENT + TRACE1_FIELD("result: ", tgt, "") + TRACE1_END + } + } + +void +parser_relop_long(cbl_field_t *tgt, + long avalue, + enum relop_t relop, + cbl_refer_t bref ) + { + Analyze(); + // We are comparing a long to a field, so the field had best be numerical + + cbl_field_t *b = bref.field; + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", tgt) + SHOW_PARSE_TEXT(" = ") + SHOW_PARSE_TEXT(relop_str(relop)) + SHOW_PARSE_REF(" ", bref) + SHOW_PARSE_END + } + + CHECK_FIELD(tgt); + CHECK_FIELD(b); + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("operation: ", relop_str(relop), "") + TRACE1_INDENT + char ach[32]; + sprintf(ach, "operand A: %ld (long value) ", avalue); + TRACE1_TEXT(ach); + TRACE1_INDENT + TRACE1_REFER("operand B: ", bref, ""); + } + + // This routine builds the relational expression and returns the TREE as + // a conditional: + + if( tgt->type != FldConditional ) + { + cbl_internal_error("parser_relop() was called with variable %s, " + "which is not a FldConditional\n", + tgt->name); + } + + tree tree_a = build_int_cst_type(LONG, avalue); + static tree tree_b = gg_define_variable(LONG, "..prl_tree_b", vs_file_static); + get_binary_value( tree_b, + NULL, + bref.field, + refer_offset_source(bref) ); + + static tree comp_res = gg_define_variable(LONG, "..prl_comp_res", vs_file_static); + gg_assign(comp_res, gg_subtract(tree_a, tree_b)); + + // comp_res is negative, zero, position for less-than, equal-to, greater-than + + // So, we simply compare the result of the comparison to zero using the relop + // we were given to turn it into a TRUE/FALSE + gg_assign( tgt->var_decl_node, + gg_build_relational_expression( comp_res, + relop, + gg_cast(LONG, integer_zero_node))); + TRACE1 + { + TRACE1_INDENT + TRACE1_FIELD("result: ", tgt, "") + TRACE1_END + } + } + +void +parser_if( struct cbl_field_t *conditional ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", conditional) + SHOW_PARSE_END + } + + CHECK_FIELD(conditional); + + if( conditional->type != FldConditional ) + { + cbl_internal_error("parser_if() was called with variable %s, " + "which is not a FldConditional\n", + conditional->name); + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("testing: ") + TRACE1_TEXT(conditional->name) + TRACE1_FIELD_VALUE("", conditional, "") + TRACE1_END + } + + gg_create_true_false_statement_lists(conditional->var_decl_node); + } + +// The following routines border on abuse of the preprocessor, if not the +// programmer who is trying to understand this. Look at the #defines in +// gengen.h, and check out the comments for gg_if in gengen.c + +void +parser_else(void) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + ELSE + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("taking FALSE branch") + TRACE1_END + } + } + +void +parser_fi(void) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + ENDIF + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + } + +void +parser_see_stop_run(struct cbl_refer_t exit_status, + const char *message) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( exit_status.field ) + { + SHOW_PARSE_FIELD(" ERROR STATUS ", exit_status.field); + } + SHOW_PARSE_END + } + if( message ) + { + parser_display_literal(message, DISPLAY_ADVANCE); + } + TRACE1 + { + TRACE1_HEADER + } + + // It's a stop run. Return return-code to the operating system: + static tree returned_value = gg_define_variable(INT, "..pssr_retval", vs_file_static); + + if( exit_status.field ) + { + // There is an exit_status, so it wins: + get_binary_value( returned_value, + NULL, + exit_status.field, + refer_offset_source(exit_status)); + TRACE1 + { + TRACE1_REFER(" exit_status ", exit_status, "") + } + } + else + { + gg_assign(returned_value, gg_cast(INT, var_decl_return_code)); + TRACE1 + { + gg_fprintf( trace_handle, + 2, + "RETURN-CODE %s [%d]", + gg_string_literal(cbl_field_of( + symbol_at(return_code_register()))->name), + returned_value); + } + } + TRACE1 + { + gg_printf(" gg_exit(%d)\n", returned_value, NULL_TREE); + TRACE1_END + } + gg_exit(returned_value); + } + +static +cbl_label_addresses_t * +label_fetch(struct cbl_label_t *label) + { + if( !label->structs.goto_trees ) + { + label->structs.goto_trees + = (cbl_label_addresses_t *)xmalloc(sizeof(struct cbl_label_addresses_t) ); + + gg_create_goto_pair(&label->structs.goto_trees->go_to, + &label->structs.goto_trees->label); + } + return label->structs.goto_trees; + } + +void +parser_label_label(struct cbl_label_t *label) + { + label->lain = yylineno; + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_LABEL("", label) + char ach[32]; + sprintf(ach, " label is at %p", label); + SHOW_PARSE_TEXT(ach) + sprintf(ach, " label->proc is %p", label->structs.proc); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + + CHECK_LABEL(label); + + TRACE1 + { + TRACE1_HEADER + TRACE1_LABEL("Establish label: ", label, "") + TRACE1_END + } + + if(strcmp(label->name, "_end_declaratives") == 0 ) + { + suppress_cobol_entry_point = false; + } + gg_append_statement( label_fetch(label)->label ); + } + +void +parser_label_goto(struct cbl_label_t *label) + { + label->used = yylineno; + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_LABEL(" ", label) + char ach[32]; + sprintf(ach, " label is at %p", label); + SHOW_PARSE_TEXT(ach) + sprintf(ach, " label->proc is %p", label->structs.proc); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + + CHECK_LABEL(label); + + TRACE1 + { + TRACE1_HEADER + TRACE1_LABEL("GOTO label: ", label, "") + TRACE1_END + } + + if(strcmp(label->name, "_end_declaratives") == 0 ) + { + suppress_cobol_entry_point = true; + } + + gg_append_statement( label_fetch(label)->go_to ); + } + +void +parser_setop( struct cbl_field_t *tgt, + struct cbl_field_t *candidate, + enum setop_t op, + struct cbl_field_t *domain) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", tgt) + SHOW_PARSE_FIELD(" = ", candidate) + if( op == is_op ) + { + SHOW_PARSE_TEXT(" is_op ") + } + SHOW_PARSE_FIELD(" = ", domain) + SHOW_PARSE_END + } + + CHECK_FIELD(tgt); + CHECK_FIELD(candidate); + CHECK_FIELD(domain); + + TRACE1 + { + TRACE1_HEADER + TRACE1_FIELD("parser_setop: ", candidate, "") + TRACE1_TEXT(" ") + TRACE1_TEXT(setop_str(op)) + TRACE1_FIELD(" ", domain, "") + TRACE1_END + } + + gcc_assert(tgt->type == FldConditional); + gcc_assert(domain->data.initial); + gcc_assert(strlen(domain->data.initial)); + + switch(op) + { + case is_op: + switch(candidate->type) + { + case FldGroup: + case FldAlphanumeric: + gg_assign(tgt->var_decl_node, gg_build_relational_expression( + gg_call_expr(INT, + "__gg__setop_compare", + member(candidate, "data"), + member(candidate, "capacity"), + member(domain, "initial"), + NULL_TREE), + ne_op, + integer_zero_node)); + break; + default: + dbgmsg("###### %10s in %s:%d\n", __func__, __FILE__, __LINE__ ); + cbl_internal_error( + "###### candidate %s has unimplemented CVT_type %d(%s)\n", + candidate->name, + candidate->type, + cbl_field_type_str(candidate->type)); + gcc_unreachable(); + break; + } + break; + + default: + dbgmsg("###### %10s in %s:%d\n", __func__, __FILE__, __LINE__ ); + cbl_internal_error("###### unknown setop_t code %d\n", op); + gcc_unreachable(); + break; + } + } + +void +parser_classify( cbl_field_t *tgt, + cbl_refer_t candidate, + enum classify_t type ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", tgt) + SHOW_PARSE_FIELD(" = ", candidate.field) + SHOW_PARSE_TEXT(" IS ") + SHOW_PARSE_TEXT(classify_str(type)) + SHOW_PARSE_END + } + + gcc_assert(tgt->type == FldConditional); + + TRACE1 + { + TRACE1_HEADER + TRACE1_REFER_VALUE("parser_classify: ", candidate, "") + TRACE1_TEXT(" ") + TRACE1_TEXT(classify_str(type)) + } + + gg_assign(tgt->var_decl_node, gg_build_relational_expression( + gg_call_expr(INT, + "__gg__classify", + build_int_cst_type(INT, type), + gg_get_address_of(candidate.field->var_decl_node), + refer_offset_dest(candidate), + refer_size_dest(candidate), + NULL_TREE), + ne_op, + integer_zero_node)); + + TRACE1 + { + TRACE1_TEXT(" result is ") + TRACE1_TEXT(tgt->name) + TRACE1_FIELD_VALUE(" -> ", tgt, "") + TRACE1_END + } + } + +void +parser_perform(struct cbl_perform_tgt_t *tgt, struct cbl_refer_t how_many) + { + cbl_field_t *N = how_many.field; + // No SHOW_PARSE here; we want to fall through: + if( !tgt->to() ) + { + // We only have tgt->from. + if( !N ) + { + // There is no N. This is a simple PERFORM proc-1 + parser_perform(tgt->from()); + } + else + { + // This is a PERFORM proc-1 N TIMES + parser_perform_times(tgt->from(), how_many); + } + } + else + { + // We have both from and to + if( !N ) + { + // There is no N. This is PERFORM proc-1 THROUGH proc-2 + // false means nexting in GDB will work + internal_perform_through(tgt->from(), tgt->to(), false); + } + else + { + // This is a PERFORM proc-1 THROUGH proc-2 N TIMES + internal_perform_through_times(tgt->from(), tgt->to(), how_many); + } + } + } + +static void +create_iline_address_pairs(struct cbl_perform_tgt_t *tgt) + { + gg_create_goto_pair(&tgt->addresses.top.go_to, + &tgt->addresses.top.label); + + gg_create_goto_pair(&tgt->addresses.exit.go_to, + &tgt->addresses.exit.label); + + gg_create_goto_pair(&tgt->addresses.test.go_to, + &tgt->addresses.test.label); + + gg_create_goto_pair(&tgt->addresses.testA.go_to, + &tgt->addresses.testA.label); + + gg_create_goto_pair(&tgt->addresses.setup.go_to, + &tgt->addresses.setup.label); + + // Even in -O0 compilations, the compiler does some elementary optimizations + // around JMP instructions. We have the SETUP code for in-line performats + // in an island at the end of the loop code. With this intervention, NEXTing + // through the code shows you the final statement of the loop before the + // loop actually starts. + + tgt->addresses.line_number_of_setup_code = gg_get_current_line_number(); + } + +void +parser_perform_start( struct cbl_perform_tgt_t *tgt ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( tgt ) + { + SHOW_PARSE_TEXT(" cbl_perform_tgt_t is at") + char ach[32]; + sprintf(ach, " %p", tgt); + SHOW_PARSE_TEXT(ach); + SHOW_PARSE_LABEL(" ", tgt->from()) + if( tgt->to() ) + { + SHOW_PARSE_LABEL(" ", tgt->to()) + } + } + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + if( tgt->from() ) + { + TRACE1_LABEL(" from ", tgt->from(), "") + } + if( tgt->to() ) + { + TRACE1_LABEL(" to ", tgt->to(), "") + } + TRACE1_END + } + + // Create the goto/label pairs we are going to be needing: + create_iline_address_pairs(tgt); + + // From here we have to jump to the loop setup code: + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("GOTO SETUP") + SHOW_PARSE_END + } + gg_append_statement(tgt->addresses.setup.go_to); + + // The next parser+_generated instructions will be the body of the loop, so we + // need a TOP label here so we can get back to them: + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("LABEL TOP:") + SHOW_PARSE_END + } + + // Give GDB-COBOL something to chew on when NEXTing. This instruction will + // get the line number of the PERFORM N TIMES code. + gg_append_statement(tgt->addresses.top.label); + gg_assign(var_decl_nop, build_int_cst_type(INT, 104)); + } + +void +parser_perform_conditional( struct cbl_perform_tgt_t *tgt ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" cbl_perform_tgt_t is at") + char ach[32]; + sprintf(ach, " %p", tgt); + SHOW_PARSE_TEXT(ach); + SHOW_PARSE_END + } + + size_t i = tgt->addresses.number_of_conditionals; + + if( !(i < MAXIMUM_UNTILS) ) + { + cbl_internal_error("%s:%d: %zu exceeds MAXIMUM_UNTILS of %d, line %d", + __func__, __LINE__, i, MAXIMUM_UNTILS, CURRENT_LINE_NUMBER); + } + gcc_assert(i < MAXIMUM_UNTILS); + + // Create an unnamed goto/label pair for jumping over the conditional + // calculation. + gg_create_goto_pair(&tgt->addresses.condover[i].go_to, + &tgt->addresses.condover[i].label); + + // Create an unnamed goto/label pair for jumping into the + // conditional calculation: + gg_create_goto_pair(&tgt->addresses.condinto[i].go_to, + &tgt->addresses.condinto[i].label); + + // Create an unnamed goto/label pair for jumping back from the + // conditional calculation: + gg_create_goto_pair(&tgt->addresses.condback[i].go_to, + &tgt->addresses.condback[i].label); + + // The next instructions that the parser will give us are the conditional + // calculation, so the first thing that goes down is the condover: + gg_append_statement(tgt->addresses.condover[i].go_to); + + // And then, of course, we need to be able to jump back here to actually + // do the run-time conditional calculations: + gg_append_statement(tgt->addresses.condinto[i].label); + + tgt->addresses.number_of_conditionals += 1; + } + +void +parser_perform_conditional_end( struct cbl_perform_tgt_t *tgt ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" cbl_perform_tgt_t is at") + char ach[32]; + sprintf(ach, " %p", tgt); + SHOW_PARSE_TEXT(ach); + SHOW_PARSE_END + } + + size_t i = tgt->addresses.number_of_conditionals; + gcc_assert(i); + + // We need to cap off the prior conditional in this chain of conditionals + gg_append_statement(tgt->addresses.condback[i-1].go_to); + gg_append_statement(tgt->addresses.condover[i-1].label); + } + +static void +build_N_pairs(tree *go_to, tree *label, size_t N) + { + for(size_t i=0; iaddresses.top.label); + + // Go do the conditional calculation: + + gg_append_statement(tgt->addresses.condinto[0].go_to); + + // And put down the label so that the conditional calculation knows + // where to return: + gg_append_statement(tgt->addresses.condback[0].label); + + char ach[256]; + size_t our_pseudo_label = pseudo_label++; + sprintf(ach, + "_proccallb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + + parser_if(varys[0].until); + { + // We're done, so leave + gg_append_statement(tgt->addresses.exit.go_to); + } + parser_else(); + { + // We're not done, so execute the body + // true means GDB next will fall through + internal_perform_through(tgt->from(), tgt->to(), true); + + // Jump back to the test: + gg_append_statement(tgt->addresses.top.go_to ); + } + parser_fi(); + + // Label the bottom of the PERFORM + gg_append_statement( tgt->addresses.exit.label ); + sprintf(ach, + "_procretb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + } + +static void +perform_outofline_after_until(struct cbl_perform_tgt_t *tgt, + bool /*test_before*/, + size_t /*N*/, + struct cbl_perform_vary_t *varys ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + // This is a PERFORM proc-1 [through proc-2] TEST AFTER UNTIL + + /* + TOP: + EXECUTE BODY + IF CONDITION 0 + GOTO EXIT + ELSE + ADD BY_0 to VARYING_0 + GOTO TOP + EXIT: + */ + + char ach[256]; + size_t our_pseudo_label = pseudo_label++; + sprintf(ach, + "_proccallb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + + create_iline_address_pairs(tgt); + + // Label the top of the loop + gg_append_statement(tgt->addresses.top.label); + + // Build the perform: + // true in the next call means that GDB next will not stop until the entire + // until loop is finished + internal_perform_through(tgt->from(), tgt->to(), true); + + // Go recalculate the conditional: + gg_append_statement( tgt->addresses.condinto[0].go_to); + + // And lay down the label for the come-back from the recalculation: + gg_append_statement( tgt->addresses.condback[0].label); + + // Assess the conditional + parser_if(varys[0].until); + // It's true, so we're done + gg_append_statement( tgt->addresses.exit.go_to ); + parser_else(); + // It's false, so execute the body again + gg_append_statement( tgt->addresses.top.go_to ); + parser_fi(); + // Label the bottom of the PERFORM + gg_append_statement( tgt->addresses.exit.label ); + sprintf(ach, + "_procretb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + } + +static void +perform_outofline_testafter_varying(struct cbl_perform_tgt_t *tgt, + bool /*test_before*/, + size_t N, + struct cbl_perform_vary_t *varys ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + // This is a PERFORM proc-1 [THROUGH proc-2] TEST AFTER VARYING + + /* + + [ENTRANCE] + MOVE FROM_0 TO VARYING_0 + INIT_1: + MOVE FROM_1 TO VARYING_1 + INIT_2: + MOVE FROM_2 TO VARYING_2 + . . . . . . . . . . . . . . . . . . + INIT_N-2: + MOVE FROM_N-2 TO VARYING_N-2 + INIT_N-1: + MOVE FROM_N-1 TO VARYING_N-1 + GOTO TOP + TOP: + PERFORM PROC-1 [THROUGH PROC-2] + IF NOT CONDITION_N-1 + ADD BY_N-1 TO VARYING_N-1 + GOTO TOP + IF NOT CONDITION_N-2 + ADD BY_N-2 TO VARYING_N-2 + GOTO INIT_N-1 + IF NOT CONDITION_N-3 + ADD BY_N-3 TO VARYING_N-3 + GOTO INIT_N-2 + . . . . . . . . . . . . . . . . . . + IF NOT CONDITION_1 + ADD BY_1 TO VARYING_1 + GOTO INIT_2 + IF NOT CONDITION_0 + ADD BY_0 TO VARYING_0 + GOTO INIT_1 + EXIT: + + */ + + // So, we're going to do that. But because the initializations + // and the testing are so nicely loopish, we're going to let + // the computer create them for us. + + // We are going to need a set of N label pairs. Actually, we + // only need N-1; we don't use the zeroth pair. But the code + // is cleaner if we just build all N of them. + + char ach[256]; + size_t our_pseudo_label = pseudo_label++; + sprintf(ach, + "_proccallb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + + create_iline_address_pairs(tgt); + + tree go_to[MAX_AFTERS]; + tree label[MAX_AFTERS]; + + build_N_pairs(go_to, label, N); + + // Build the initialization section: + for(size_t i=0; iaddresses.top.go_to); + gg_append_statement(tgt->addresses.top.label); + + // Build the body: + // true in the next call means that the entire loop will complete + // even in the face of a GDB next + internal_perform_through(tgt->from(), tgt->to(), true); + + // Build the test section + // (The oddball test is because N is a size_t, and can't go negative) + for(size_t i=N-1; iaddresses.condinto[i].go_to); + + // And put down the label for the return from that calculation: + gg_append_statement( tgt->addresses.condback[i].label); + + parser_if( varys[i].until ); + // Condition is true; so we'll fall through + parser_else(); + // Condition is false, so we increment, and keep going: + parser_add(varys[i].varying, varys[i].by, varys[i].varying); + if( i == N-1 ) + { + gg_append_statement(tgt->addresses.top.go_to); + } + else + { + gg_append_statement(go_to[i+1]); + } + parser_fi(); + } + // Arriving here means that we all of the conditions were + // true. So, we're done. + sprintf(ach, + "_procretb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + } + +static void +perform_outofline_before_varying( struct cbl_perform_tgt_t *tgt, + bool /*test_before*/, + size_t N, + struct cbl_perform_vary_t *varys ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + // This is a PERFORM proc-1 [THROUGH proc-2] TEST BEFORE VARYING + + /* + + ENTRANCE: + SET ALL VARYING-N to FROM-N + TEST_0: + IF CONDITION_0: + GOTO EXIT: + TEST_1: + IF CONDITION_1: + ADD BY_0 TO VARYING_0 + MOVE FROM_1 TO VARYING_1 + GOTO TEST_0 + TEST_2: + IF CONDITION_2: + ADD BY_1 TO VARYING_1: + MOVE FROM_2 TO VARYING_2 + GOTO TEST_1: + TEST_3: + IF CONDITION_3: + ADD BY_2 TO VARYING_2: + MOVE FROM_3 TO VARYING_3 + GOTO TEST_1: + . . . . . . . . . . . . . . . . + TEST_N-1: + IF CONDITION_N-1: + ADD BY_N-2 TO VARYING_N-2: + MOVE FROM_N-2 TO VARYING_N-2 + GOTO TEST_N-2 + TOP: + PERFORM proc-1 [THROUGH proc-2] + + ADD BY_N-1 TO VARYING_N-1: + GOTO TEST_N-1 + + */ + create_iline_address_pairs(tgt); + + tree go_to[MAX_AFTERS]; + tree label[MAX_AFTERS]; + build_N_pairs(go_to, label, N); + + char ach[256]; + size_t our_pseudo_label = pseudo_label++; + sprintf(ach, + "_proccallb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + + // Initialize all varying: + + for(size_t i=0; iaddresses.condinto[i].go_to); + + // And put down the label that brings us back: + gg_append_statement(tgt->addresses.condback[i].label); + + // Now we can test the calculated conditional: + parser_if(varys[i].until); + // This condition has been met, so we increment the + // variable to the left, reset ours, and go check the + // one we just incremented + if(i == 0) + { + // This is the leftmost condition condition, so when it + // is TRUE, we are done. + gg_append_statement( tgt->addresses.exit.go_to ); + } + else + { + // This is one of the conditions to the right of the + // first one. So, we augment the VARYING to the + // left, reset our VARYING, and go test the + // condition to the left: + parser_add(varys[i-1].varying, varys[i-1].by, varys[i-1].varying); + parser_move(varys[i].varying, varys[i].from); + gg_append_statement( go_to[i-1] ); + } + parser_else(); + // This condition has not been met. + if( i == N-1 ) + { + // ... and this is the rightmost condition + // This is where we perform the body of the PERFORM. + gg_append_statement( tgt->addresses.top.label ); + + // Build the body: + // true in the next call means that GDB NEXT will pass through the + // entire loop + internal_perform_through(tgt->from(), tgt->to(), true); + + // And now we augment FROM_N-1 by BY__N-1 + parser_add(varys[N-1].varying, varys[N-1].by, varys[N-1].varying); + + // And we jump back to test that freshly-augmented condition + gg_append_statement( go_to[N-1] ); + } + else + { + // At this point, a condition that is not the rightmost + // one has not been met. We could, in principle, just + // fall through at this point. But that makes me nervous. + // So, I am going to put in what may well be an + // unnecessary goto: + gg_append_statement( go_to[i+1] ); + } + parser_fi(); + } + // The astute observer will have noted that there is no way + // for the generated runtime code to reach this point except by jumpint to + // the EXIT: label. + // We have, you see, reached the egress: + gg_append_statement( tgt->addresses.exit.label ); + sprintf(ach, + "_procretb.%ld:", + our_pseudo_label); + gg_insert_into_assembler( ach ); + } + +static void +perform_outofline( struct cbl_perform_tgt_t *tgt, + bool test_before, + size_t N, + struct cbl_perform_vary_t *varys ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + // This is an out-of-line perform. + + // We need to create the address pairs, because there was no parser_perform_start + + if( N == 1 && !varys[0].varying.field ) + { + // There is no varys.varying, so this is just a PERFORM proc-1 UNTIL + if( test_before ) + { + perform_outofline_before_until(tgt, test_before, N, varys); + } + else + { + perform_outofline_after_until(tgt, test_before, N, varys); + } + } + else + { + // This is a PERFORM proc-1 [through proc-2] VARYING + if( test_before ) + { + perform_outofline_before_varying(tgt, test_before, N, varys); + } + else + { + perform_outofline_testafter_varying(tgt, test_before, N, varys); + } + } + } + +static void +perform_inline_until( struct cbl_perform_tgt_t *tgt, + bool test_before, + size_t /*N*/, + struct cbl_perform_vary_t *varys ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + // This is a PERFORM [TEST {BEFORE|AFTER}] UNTIL + + /* + + GOTO SETUP + TOP: S1 + S2 + EXIT PERFORM -> GOTO EXIT: + S3 + S4 + EXIT PERFORM CYCLE -> GOTO TEST + S6 + S7 + TEST: IF CONDITION + GOTO EXIT + ELSE + GOTO TOP + SETUP: + IF TEST BEFORE + GOTO TEST + ELSE + GOTO TOP + EXIT: + */ + gg_set_current_line_number(cobol_location().last_line); + + gg_append_statement(tgt->addresses.test.label); + + // Go to where the conditional is recalculated.... + gg_append_statement(tgt->addresses.condinto[0].go_to); + + // ...and lay down the return address. + gg_append_statement(tgt->addresses.condback[0].label); + + parser_if( varys[0].until ); + gg_append_statement( tgt->addresses.exit.go_to ); + parser_else(); + gg_append_statement( tgt->addresses.top.go_to ); + parser_fi(); + gg_append_statement( tgt->addresses.setup.label ); + + if( test_before ) + { + gg_append_statement( tgt->addresses.test.go_to ); + } + else + { + gg_append_statement( tgt->addresses.top.go_to ); + } + gg_append_statement( tgt->addresses.exit.label ); + } + +static void +perform_inline_testbefore_varying( struct cbl_perform_tgt_t *tgt, + bool /*test_before*/, + size_t N, + struct cbl_perform_vary_t *varys ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + // This is a PERFORM proc-1 [THROUGH proc-2] TEST BEFORE VARYING + + /* + + GOTO SETUP + TOP: + S1 + S2 + EXIT PERFORM -- GOTO EXIT: + S3 + S4 + EXIT PERFORM CYCLE -- GOTO TESTA + S5 + S6 + GOTO AUGMENT_N-1 + SETUP: + SET ALL VARYING-N to FROM-N + TEST_0: + IF CONDITION_0: + GOTO EXIT: + TEST_1: + IF CONDITION_1: + ADD BY_0 TO VARYING_0 + MOVE FROM_1 TO VARYING_1 + GOTO TEST_0 + TEST_2: + IF CONDITION_2: + ADD BY_1 TO VARYING_1: + MOVE FROM_2 TO VARYING_2 + GOTO TEST_1: + TEST_3: + IF CONDITION_3: + ADD BY_2 TO VARYING_2: + MOVE FROM_3 TO VARYING_3 + GOTO TEST_1: + . . . . . . . . . . . . . . . . + TEST_N-1: + IF CONDITION_N-1: + ADD BY_N-2 TO VARYING_N-2: + MOVE FROM_N-2 TO VARYING_N-2 + GOTO TEST_N-2 + + GOTO TOP + TESTA: + ADD BY_N-1 TO VARYING_N-1: + GOTO TEST_N-1 + + */ + tree go_to[MAX_AFTERS]; + tree label[MAX_AFTERS]; + build_N_pairs(go_to, label, N); + + // At this point in the executable, the body of the inline loop has been + // laid down, so we lay down a GOTO TESTA + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("GOTO TESTA") + SHOW_PARSE_END + } + gg_append_statement(tgt->addresses.testA.go_to); + + // It's now safe to setup the whole extravaganza of UNTIL conditions: + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("LABEL SETUP:") + SHOW_PARSE_END + } + gg_append_statement(tgt->addresses.setup.label); + + // Initialize all varying: + for(size_t i=0; iaddresses.condinto[i].go_to); + + // ...and lay down the label for the return from there + SHOW_PARSE + { + SHOW_PARSE_INDENT + char ach[32]; + sprintf(ach, "LABEL CONDBACK[%ld]:", i); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + gg_append_statement(tgt->addresses.condback[i].label); + + // Test that conditional + parser_if(varys[i].until); + // This condition has been met, so we increment the + // variable to the left, reset ours, and go check the + // one we just incremented + if(i == 0) + { + // This is the leftmost condition condition, so when it + // is TRUE, we are done. + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("GOTO EXIT") + SHOW_PARSE_END + } + gg_append_statement( tgt->addresses.exit.go_to ); + } + else + { + // This is one of the conditions to the right of the + // first one. So, we augment the VARYING to the + // left, reset our VARYING, and go test the + // condition to the left: + parser_add(varys[i-1].varying, varys[i-1].by, varys[i-1].varying); + parser_move(varys[i].varying, varys[i].from); + SHOW_PARSE + { + SHOW_PARSE_INDENT + char ach[32]; + sprintf(ach, "GOTO [%ld]:", i-1); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + gg_append_statement( go_to[i-1] ); + } + parser_else(); + // This condition has not been met. + if( i == N-1 ) + { + // ... and this is the rightmost condition + // This is where we perform the body of the PERFORM. + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("GOTO TOP") + SHOW_PARSE_END + } + gg_append_statement( tgt->addresses.top.go_to ); + + // And now we augment FROM_N-1 by BY__N-1 + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("LABEL TESTA:") + SHOW_PARSE_END + } + gg_append_statement(tgt->addresses.testA.label); + parser_add(varys[N-1].varying, varys[N-1].by, varys[N-1].varying); + // And we jump back to test that freshly-augmented condition + SHOW_PARSE + { + SHOW_PARSE_INDENT + char ach[32]; + sprintf(ach, "GOTO [%ld]:", N-1); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + gg_append_statement( go_to[N-1] ); + } + else + { + // At this point, a condition that is not the rightmost + // one has not been met. We could, in principle, just + // fall through at this point. But that makes me nervous. + // So, I am going to put in what may well be an + // unnecessary goto: + SHOW_PARSE + { + SHOW_PARSE_INDENT + char ach[32]; + sprintf(ach, "GOTO [%ld]:", i-1); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + gg_append_statement( go_to[i+1] ); + } + parser_fi(); + } + + // The astute observer will have noted that there is no way + // for the generated runtime code to reach this point. + // + // We have, you see, reached the egress: + gg_append_statement( tgt->addresses.exit.label ); + } + +static void +perform_inline_testafter_varying( struct cbl_perform_tgt_t *tgt, + bool /*test_before*/, + size_t N, + struct cbl_perform_vary_t *varys ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + // This is a PERFORM TEST AFTER VARYING + + /* + + GOTO SETUP + TOP: + S1 + S2 + EXIT PERFORM -- GOTO EXIT: + S3 + S4 + EXIT PERFORM CYCLE -- GOTO TESTA + S5 + S6 + GOTO TESTA: + + SETUP: + MOVE FROM_0 TO VARYING_0 + INIT_1: + MOVE FROM_1 TO VARYING_1 + INIT_2: + MOVE FROM_2 TO VARYING_2 + . . . . . . . . . . . . . . . . . . + INIT_N-2: + MOVE FROM_N-2 TO VARYING_N-2 + INIT_N-1: + MOVE FROM_N-1 TO VARYING_N-1 + GOTO TOP + TESTA: + TEST_N-1: + IF NOT CONDITION_N-1 + ADD BY_N-1 TO VARYING_N-1 + GOTO TOP + IF NOT CONDITION_N-2 + ADD BY_N-2 TO VARYING_N-2 + GOTO INIT_N-1 + IF NOT CONDITION_N-3 + ADD BY_N-3 TO VARYING_N-3 + GOTO INIT_N-2 + . . . . . . . . . . . . . . . . . . + IF NOT CONDITION_1 + ADD BY_1 TO VARYING_1 + GOTO INIT_2 + IF NOT CONDITION_0 + ADD BY_0 TO VARYING_0 + GOTO INIT_1 + // At this point, all conditions are true + EXIT: + + */ + + // So, we're going to do that. But because the initializations + // and the testing are so nicely loopish, we're going to let + // the computer create them for us. + + // We are going to need a set of N label pairs. Actually, we + // only need N-1; we don't use the zeroth pair. But the code + // is cleaner if we just build all N of them. + + tree go_to[MAX_AFTERS]; + tree label[MAX_AFTERS]; + + build_N_pairs(go_to, label, N); + + // At this point the code being laid down, the GOTO SETUP was created, + // followed by the stream of statements. We terminate it with a + // goto testa + gg_append_statement(tgt->addresses.testA.go_to); + + // See the comment in create_iline_address_pairs() + //gg_force_line_number(tgt->addresses.line_number_of_setup_code-1); + + // That's followed by the SETUP target: + gg_append_statement(tgt->addresses.setup.label); + + // We now build the initialization section, + for(size_t i=0; iaddresses.top.go_to); + + // The list of statements ends with a goto TESTA, and that;s here: + gg_append_statement(tgt->addresses.testA.label); + + // Build the test section + // (The oddball test is because N is a size_t, and can't go negative) + for(size_t i=N-1; iaddresses.condinto[i].go_to); + + // ...and lay down the label to get back from there + gg_append_statement(tgt->addresses.condback[i].label); + + // Test the newly-recalculated conditional: + parser_if( varys[i].until ); + // Condition is true; so we'll fall through + parser_else(); + // Condition is false, so we increment, and keep going: + parser_add(varys[i].varying, varys[i].by, varys[i].varying); + if( i == N-1 ) + { + gg_append_statement(tgt->addresses.top.go_to); + } + else + { + gg_append_statement(go_to[i+1]); + } + parser_fi(); + } + + // Arriving here means that we all of the conditions were + // true. So, we're done. + gg_append_statement( tgt->addresses.exit.label ); + } + +static void +perform_inline_impl( struct cbl_perform_tgt_t *tgt, + bool test_before, + size_t N, + struct cbl_perform_vary_t *varys ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + if( N == 1 && !varys[0].varying.field ) + { + perform_inline_until(tgt, test_before, N, varys); + } + else + { + // This is a PERFORM proc-1 [through proc-2] VARYING + if( !test_before ) + { + perform_inline_testafter_varying(tgt, test_before, N, varys); + } + else + { + perform_inline_testbefore_varying(tgt, test_before, N, varys); + } + } + } + +void +parser_perform_until( struct cbl_perform_tgt_t *tgt, + bool test_before, + size_t N, + struct cbl_perform_vary_t *varys ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" cbl_perform_tgt_t is at") + char ach[32]; + sprintf(ach, " %p", tgt); + SHOW_PARSE_TEXT(ach); + SHOW_PARSE_LABEL(" ", tgt->from()) + if( tgt->to() ) + { + SHOW_PARSE_LABEL(" THROUGH", tgt->to()) + } + SHOW_PARSE_END + } + + gg_set_current_line_number(cobol_location().last_line); + gg_assign(var_decl_nop, build_int_cst_type(INT, 105)); + + if( tgt->from()->type != LblLoop ) + { + perform_outofline( tgt, test_before, N, varys); + } + else + { + perform_inline_impl( tgt, test_before, N, varys); + } + } + +void +parser_perform_inline_times(struct cbl_perform_tgt_t *tgt, + struct cbl_refer_t how_many ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_LABEL("", tgt->from()); + SHOW_PARSE_REF(" how_many is ", how_many); + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_FIELD(" into ", how_many.field, " times"); + TRACE1_END + } + + gcc_assert(tgt); + cbl_field_t *count = how_many.field; + if( how_many.is_reference() ) + { + cbl_internal_error("%s:%d: ignoring subscripts", __func__, __LINE__); + } + CHECK_FIELD(count); + + // This has to be on the stack, because performs can be nested + tree counter = gg_define_variable(LONG); + + /* + GOTO SETUP + TOP: S1 + EXIT PERFORM --> GOTO EXIT + S2 + EXIT PERFORM CYCLE --> GOTO TEST + S3 + TESTA: + TEST: INCREMENT COUNTER + IF COUNTER LT LIMIT + GOTO TOP + ELSE + GOTO EXIT + SETUP: INITIALIZE COUNTER + GOTO TOP + EXIT: + */ + + // At this point, the GOTO SETUP, the label "TOP:" and the + // body of the inline perform have been laid down. + + // Tack on the label for TEST and TESTA + gg_append_statement( tgt->addresses.testA.label ); + gg_append_statement( tgt->addresses.test.label ); + + // AT this point, we want to set the line_number to the location of the + // END-PERFORM statement. + gg_set_current_line_number(cobol_location().last_line); + + gg_decrement(counter); + // Do the test: + IF( counter, gt_op, gg_cast(LONG, integer_zero_node) ) + // We continue + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("If still counting GOTO TOP") + SHOW_PARSE_END + } + gg_append_statement( tgt->addresses.top.go_to ); + ELSE + // We are done + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("If count complete GOTO EXIT") + SHOW_PARSE_END + } + gg_append_statement( tgt->addresses.exit.go_to ); + ENDIF + + // Lay down the SETUP: label + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("LABEL SETUP:") + SHOW_PARSE_END + } + + int stash = gg_get_current_line_number(); + gg_set_current_line_number(tgt->addresses.line_number_of_setup_code); + gg_append_statement( tgt->addresses.setup.label ); + + // Get the count: + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("Access the how_many parameter") + SHOW_PARSE_REF(" ", how_many) + SHOW_PARSE_END + } + + get_binary_value( counter, + NULL, + count, + size_t_zero_node); + + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("GOTO TOP") + SHOW_PARSE_END + } + + // Make sure the initial count is valid: + IF( counter, gt_op, gg_cast(LONG, integer_zero_node) ) + gg_append_statement( tgt->addresses.top.go_to ); + ELSE + gg_append_statement( tgt->addresses.exit.go_to ); + ENDIF + + gg_set_current_line_number(stash); + + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("LABEL EXIT:") + SHOW_PARSE_END + } + gg_append_statement( tgt->addresses.exit.label ); + } + +void +parser_set_conditional88( struct cbl_refer_t refer, bool which_way ) + { + Analyze(); + struct cbl_field_t *tgt = refer.field; + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", tgt) + if( which_way ) + { + SHOW_PARSE_TEXT(" TRUE"); + } + else + { + SHOW_PARSE_TEXT(" FALSE"); + } + SHOW_PARSE_END + } + + CHECK_FIELD(tgt); + + struct cbl_field_t *parent = parent_of(tgt); + + CHECK_FIELD(parent); + + cbl_domain_t *src; + + if( which_way ) + { + src = tgt->data.domain; + } + else + { + src = tgt->data.false_value; + } + + // We want to set the LEVEL88 target to TRUE (or FALSE), so we need to set + // the parent of this LEVEL88 to the first element in data.domain (or + // data.false_value); + + cbl_figconst_t figconst = cbl_figconst_of(src->first.name()); + + if( !figconst ) + { + // We are dealing with an ordinary string. + static size_t buffer_size = 0; + static char *buffer = NULL; + size_t length = src->first.size(); + raw_to_internal(&buffer, &buffer_size, src->first.name(), length); + move_tree_to_field( parent, + gg_string_literal(buffer)); + } + else + { + // This is a figurative constant + gg_call(VOID, + "__gg__parser_set_conditional", + gg_get_address_of(parent->var_decl_node), + build_int_cst_type(INT, figconst), + NULL_TREE); + } + } + +static +void set_user_status(struct cbl_file_t *file) + { + // This routine sets the user_status, if any, to the cblc_file_t::status + if(file->user_status) + { + cbl_field_t *user_status = cbl_field_of(symbol_at(file->user_status)); + gcc_assert( user_status ); + gg_call(VOID, + "__gg__set_user_status", + gg_get_address_of(user_status->var_decl_node), + gg_get_address_of(file->var_decl_node), + NULL_TREE); + } + } + +void +parser_file_add(struct cbl_file_t *file) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( file ) + { + fprintf(stderr, " cbl_file_t: %s", file->name); + if( file->record_length ) + { + SHOW_PARSE_TEXT(" file->record_length is %s"); + SHOW_PARSE_TEXT(file->name); + } + else + { + SHOW_PARSE_TEXT(" file->record_length is ZERO") + } + } + else + { + SHOW_PARSE_TEXT( " *file pointer is NULL") + } + SHOW_PARSE_END + } + + if( !file ) + { + cbl_internal_error("%s(): called with NULL *file", __func__); + gcc_assert(file); + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("parser_file_add cbl_file_t ") + TRACE1_TEXT(file->name); + TRACE1_END + } + + /* The FD record can be flagged external. Without definitive information, I + am going to assume that the *everything* in the cblc_file_t structure is + GLOBAL EXTERNAL. If I have read the specification incorrectly, and it's + possible for two programs to share a file connector but with, say, two + different lists of keys, then the cblc_file_t structure will have to + be changed to have one var_decl node for the common information, and a + second one for local information. + + */ + + gg_variable_scope_t scope; + if( file->attr & external_e ) + { + scope = vs_external; + } + else + { + scope = vs_static; + } + + char achName[2*sizeof(cbl_name_t)]; + + // Use the global structure template declaration to produce the specific + // structure definition expression: + strcpy(achName, "_"); + strcat(achName, file->name); + strcat(achName, "_fc"); // For "File Connector" + tree new_var_decl = gg_define_variable( cblc_file_type_node, + achName, + scope); + + // We have to convert file->nkey and file->keys to the run-time formats. + + // There can be 0 through N keys, and each of those keys has M fields. Each of + // the M fields has a "unique" flag, which we pass along as an array of INTs. + + int number_of_key_fields = 0; + for( size_t i=0; inkey; i++ ) + { + number_of_key_fields += file->keys[i].nfield; + } + + // We create an array of pointers for those fields, adding an additional + // element for a NULL pointer to indicate the end of the list: + strcpy(achName, "_"); + strcat(achName, file->name); + strcat(achName, "_keys"); + tree array_of_keys = gg_define_variable( + build_pointer_type(cblc_field_p_type_node), + achName, + scope); + gg_assign(array_of_keys, + gg_cast(build_pointer_type(cblc_field_p_type_node), + gg_malloc(build_int_cst_type(SIZE_T, + (number_of_key_fields+1) + *sizeof(void *))))); + + strcpy(achName, "_"); + strcat(achName, file->name); + strcat(achName, "_keynum"); + tree key_numbers = gg_define_variable(build_pointer_type(INT), + achName, + scope); + gg_assign(key_numbers, + gg_cast(build_pointer_type(INT), + gg_malloc(build_int_cst_type(SIZE_T, + (number_of_key_fields+1) + *sizeof(int))))); + + strcpy(achName, "_"); + strcat(achName, file->name); + strcat(achName, "_uniqs"); + tree unique_flags = gg_define_variable( build_pointer_type(INT), + achName, + scope); + gg_assign(unique_flags, + gg_cast(build_pointer_type(INT), + gg_malloc(build_int_cst_type(SIZE_T, + (number_of_key_fields+1) + *sizeof(int))))); + + size_t index = 0; + for( size_t i=0; inkey; i++ ) + { + for( size_t j=0; jkeys[i].nfield; j++ ) + { + gg_assign(gg_array_value(array_of_keys, index), + get_field_p(file->keys[i].fields[j]) ); + + gg_assign(gg_array_value(key_numbers, index), + build_int_cst_type(INT, i+1)); + + gg_assign(gg_array_value(unique_flags, index), + (file->keys[i].unique ? integer_one_node : integer_zero_node)); + index += 1; + } + } + // Terminate the field list with a NULL: + gg_assign( gg_array_value(array_of_keys, index), gg_cast(cblc_field_p_type_node, null_pointer_node) ); + + // Terminate the key-numbers list with a negative 1 as a guardrail: + gg_assign( gg_array_value(key_numbers, index), integer_minusone_node ); + + // Terminate the uniques list with a zero, just to avoid garbage: + gg_assign( gg_array_value(unique_flags, index), integer_zero_node ); + + cbl_file_t::varying_t varies = symbol_file_record_sizes(file); + + gcc_assert(varies.min <= varies.max); + + if(file->access == file_inaccessible_e) + { + cbl_internal_error( + "%s:%d file %s access mode is 'file_inaccessible_e' in %s", + current_filename.back().c_str(), + CURRENT_LINE_NUMBER, + file->name, + __func__); + } + + gg_call(VOID, + "__gg__file_init", + gg_get_address_of(new_var_decl), + gg_string_literal(file->name), + array_of_keys, + key_numbers, + unique_flags, + gg_get_address_of(symbol_file_record(file)->var_decl_node), + get_field_p(file->password), + get_field_p(file->user_status), + get_field_p(file->vsam_status), + get_field_p(file->record_length), + get_field_p(file_status_register()), + build_int_cst_type(SIZE_T, file->reserve), + build_int_cst_type(INT, (int)file->org), + build_int_cst_type(INT, (int)file->padding), + build_int_cst_type(INT, (int)file->access), + build_int_cst_type(INT, (int)file->optional), + build_int_cst_type(SIZE_T, varies.min), + build_int_cst_type(SIZE_T, varies.max), + NULL_TREE); + file->var_decl_node = new_var_decl; + } + +static void store_location_stuff(const cbl_name_t statement_name); + +void +parser_file_open( size_t nfiles, struct cbl_file_t *files[], int mode_char ) + { + for(size_t i=0; iname); + char ach[64]; + sprintf(ach, ", organization is %s", file_org_str(file->org)); + SHOW_PARSE_TEXT(ach); + } + else + { + SHOW_PARSE_TEXT(" *file is NULL") + } + + SHOW_PARSE_TEXT(", mode_char: ") + char ach[2] = ""; + ach[0] = mode_char; + SHOW_PARSE_TEXT(ach) + + SHOW_PARSE_END + } + + if( !file ) + { + cbl_internal_error("parser_file_open called with NULL *file"); + } + + if( !file->var_decl_node ) + { + cbl_internal_error("parser_file_open for %s called with NULL var_decl_node", file->name); + } + + if( mode_char == 'a' && (file->access != file_access_seq_e) ) + { + cbl_internal_error("EXTEND can only be used where %s is ACCESS MODE SEQUENTIAL", file->name); + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("parser_file_open of ") + TRACE1_TEXT(file->name); + TRACE1_END + } + + // The cbl_file_t has a cbl_field_t *filename. This can be a FldAlphanumeric. + // The runtime has a (char *)filename, so we need to + // do a runtime conversion. + + tree psz; // This is going to be either the name of the file, or the + // possible run-time environment variable that will contain + // the name of the file. + + cbl_field_t *field_of_name = symbol_field_forward(file->filename); + bool quoted_name = false; + if( field_of_name->type == FldForward ) + { + // The target of ASSIGN TO was unquoted, but didn't resolve to a + // cbl_field_t. This means that the name of the field is an + // environment variable that will hold the file name + psz = gg_define_char_star(); + gg_assign(psz, gg_strdup(gg_string_literal(field_of_name->name))); + } + else + { + // The name is coming from a presumably FldAlphaNumeric variable + psz = get_string_from(field_of_name); + quoted_name = true; + } + + store_location_stuff("OPEN"); + gg_call(VOID, + "__gg__file_open", + gg_get_address_of(file->var_decl_node), + psz, + build_int_cst_type(INT, mode_char), + quoted_name ? integer_one_node : integer_zero_node, + NULL_TREE); + set_user_status(file); + } + +void +parser_file_close( struct cbl_file_t *file, file_close_how_t how ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if(file) + { + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(file->name); + } + else + { + SHOW_PARSE_TEXT(" *file is NULL ") + } + SHOW_PARSE_END + } + + if( !file ) + { + cbl_internal_error("parser_file_close called with NULL *file"); + } + + if( !file->var_decl_node ) + { + cbl_internal_error("parser_file_close for %s called with NULL file->var_decl_node", file->name); + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("parser_file_close of ") + TRACE1_TEXT(file->name); + TRACE1_END + } + + // We are done with the filename. The library routine will free "filename" + // memory and set it back to null + + store_location_stuff("CLOSE"); + gg_call(VOID, + "__gg__file_close", + gg_get_address_of(file->var_decl_node), + build_int_cst_type(INT, (int)how), + NULL_TREE); + set_user_status(file); + } + +void +parser_file_read( struct cbl_file_t *file, + cbl_refer_t /*data_dest*/, + int where ) + { + Analyze(); + // where = -2 means PREVIOUS + // where = -1 means NEXT + // where = 1 or more means key N, where N is one-based + SHOW_PARSE + { + SHOW_PARSE_HEADER + if(file) + { + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(file->name); + } + else + { + SHOW_PARSE_TEXT(" *file is NULL") + } + + char ach[32]; + sprintf(ach, " where:%d", where); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_END + } + + if( where == 0 ) + { + cbl_internal_error("%s:%d file %s 'where' is zero in %s", + current_filename.back().c_str(), + CURRENT_LINE_NUMBER, + file->name, + __func__); + where = -1; + } + + if( !file ) + { + cbl_internal_error("parser_file_read called with NULL *file"); + } + + if( !file->var_decl_node ) + { + cbl_internal_error("parser_file_read for %s called with NULL file->var_decl_node", file->name); + } + + if( !file ) + { + cbl_internal_error("parser_file_read called with NULL *field"); + } + + if( !file->var_decl_node ) + { + cbl_internal_error("parser_file_read for %s called with NULL field->var_decl_node", file->name); + } + + if( file->access == file_access_seq_e && where >= 0) + { + cbl_internal_error("%s:%d file %s is RELATIVE/SEQUENTIAL, but 'where' >= 0", + current_filename.back().c_str(), + CURRENT_LINE_NUMBER, + file->name); + where = -1; + } + + if( file->access == file_access_rnd_e && where < 0) + { + cbl_internal_error("%s:%d file %s is RELATIVE/RANDOM, but 'where' < 0", + current_filename.back().c_str(), + CURRENT_LINE_NUMBER, + file->name); + where = 1; + } + + store_location_stuff("READ"); + gg_call(VOID, + "__gg__file_read", + gg_get_address_of(file->var_decl_node), + build_int_cst_type(INT, where), + NULL_TREE); + set_user_status(file); + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("from ") + TRACE1_TEXT(file->name); + TRACE1_INDENT + cbl_field_t *our_return_code + = cbl_field_of(symbol_at(file_status_register())); + TRACE1_FIELD("result: ", our_return_code, ""); + TRACE1_END + } + } + +void +parser_file_write( cbl_file_t *file, + cbl_field_t *record_area, + bool after, + cbl_refer_t &advance, + bool sequentially + ) + { + Analyze(); + + bool is_random = !( file->access == file_access_seq_e + || file->access == file_inaccessible_e); + + if( (is_random ? 1 : 0) != (sequentially ? 0 : 1) ) + { + cbl_internal_error("%s:%d file %s 'sequentially' is %d in %s", + current_filename.back().c_str(), + CURRENT_LINE_NUMBER, + file->name, + sequentially ? 1 : 0, + __func__); + } + + SHOW_PARSE + { + SHOW_PARSE_HEADER + if(file) + { + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(file->name); + } + else + { + SHOW_PARSE_TEXT(" *file is NULL") + } + + if( !advance.field ) + { + SHOW_PARSE_TEXT(" automatic BEFORE ADVANCING 1 LINE") + } + else + { + if( after ) + { + SHOW_PARSE_TEXT(" AFTER") + } + else + { + SHOW_PARSE_TEXT(" BEFORE") + } + SHOW_PARSE_REF(" ADVANCING ", advance); + SHOW_PARSE_TEXT(" LINE(S)") + } + + SHOW_PARSE_END + } + + if( !file ) + { + cbl_internal_error("%s(): called with NULL *file", __func__); + } + + if( !file->var_decl_node ) + { + cbl_internal_error("%s(): for %s called with NULL file->var_decl_node", + __func__, file->name); + } + + if( !file ) + { + cbl_internal_error("%s(): called with NULL *field", __func__); + } + + if( !file->var_decl_node ) + { + cbl_internal_error( "%s(): for %s called with NULL field->var_decl_node", + __func__, + file->name); + } + + static tree t_advance = gg_define_variable(INT, "..pfw_advance", vs_file_static); + if(advance.field) + { + static tree value = gg_define_variable(INT, "..pfw_value", vs_file_static); + get_binary_value( value, + NULL, + advance.field, + refer_offset_source(advance)); + gg_assign(t_advance, gg_cast(INT, value)); + } + else + { + if( file->org == file_line_sequential_e ) + { + // ISO/IEC_1989-2014 and IBM say the default is AFTER advancing + // MicroFocus and GnuCOBOL say the default is BEFORE advancing. + // See the comment where the variable is defined: + after = auto_advance_is_AFTER_advancing; + gg_assign(t_advance, integer_one_node); + } + else + { + // The default for SEQUENTIAL is no vertical motion + gg_assign(t_advance, integer_minusone_node); + } + } + + gcc_assert(record_area); + if( !record_area ) + { + record_area = cbl_field_of(symbol_at(file->default_record)); + } + + store_location_stuff("WRITE"); + gg_call(VOID, + "__gg__file_write", + gg_get_address_of(file->var_decl_node), + member(record_area, "data"), + member(record_area, "capacity"), + after ? integer_one_node : integer_zero_node, + t_advance, + is_random ? integer_one_node : integer_zero_node, + NULL_TREE); + set_user_status(file); + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("to ") + TRACE1_TEXT(file->name); + TRACE1_INDENT + if( advance.field ) + { + TRACE1_INDENT + if( after ) + { + TRACE1_TEXT("AFTER") + } + else + { + TRACE1_TEXT("BEFORE") + } + TRACE1_REFER(" ADVANCING ", advance, " LINE(S)"); + } + TRACE1_INDENT + cbl_field_t *our_return_code + = cbl_field_of(symbol_at(file_status_register())); + TRACE1_FIELD("result: ", our_return_code, ""); + TRACE1_END + } + } + +void +parser_file_delete( struct cbl_file_t *file, bool /*sequentially*/ ) + { + Analyze(); + bool sequentially = file->access == file_access_seq_e + || file->org == file_sequential_e + || file->org == file_line_sequential_e; + SHOW_PARSE + { + SHOW_PARSE_HEADER + if(file) + { + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(file->name); + if( sequentially ) + { + SHOW_PARSE_TEXT(" sequentially") + } + else + { + SHOW_PARSE_TEXT(" sequentially") + } + } + else + { + SHOW_PARSE_TEXT(" *file is NULL") + } + SHOW_PARSE_END + } + + store_location_stuff("DELETE"); + gg_call(VOID, + "__gg__file_delete", + gg_get_address_of(file->var_decl_node), + sequentially ? integer_zero_node : integer_one_node, + NULL_TREE); + set_user_status(file); + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("parser_file_delete record ") + TRACE1_TEXT(file->name); + TRACE1_END + } + } + +void +parser_file_rewrite(cbl_file_t *file, + cbl_field_t *record_area, + bool sequentially ) + { + Analyze(); + if( file->org == file_indexed_e + && file->access == file_access_seq_e + && !sequentially ) + { + cbl_internal_error( + "%s:%d file %s is INDEXED/SEQUENTIAL, but 'sequentially' is false", + current_filename.back().c_str(), + CURRENT_LINE_NUMBER, + file->name); + sequentially = true; + } + + SHOW_PARSE + { + SHOW_PARSE_HEADER + if(file) + { + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(file->name); + } + else + { + SHOW_PARSE_TEXT(" *file is NULL") + } + SHOW_PARSE_END + } + + gcc_assert(record_area); + if( !record_area ) + { + record_area = cbl_field_of(symbol_at(file->default_record)); + } + + store_location_stuff("REWRITE"); + gg_call(VOID, + "__gg__file_rewrite", + gg_get_address_of(file->var_decl_node), + member(record_area, "capacity"), + sequentially ? integer_zero_node : integer_one_node, + NULL_TREE); + set_user_status(file); + } + +/* + * flk is first-last-key. Similar to parser_file_read, it is a + * 1-based index, for consistency. Encoded values: + * -1 FIRST + * -2 LAST + * 0 neither + * >0 1-based index into cbl_file_t::keys + */ +void +parser_file_start(struct cbl_file_t *file, + relop_t op, + int flk, + cbl_refer_t length_ref ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if(file) + { + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(file->name); + switch(op) + { + case lt_op: + SHOW_PARSE_TEXT(" lt_op") + break; + case le_op: + SHOW_PARSE_TEXT(" le_op") + break; + case eq_op: + SHOW_PARSE_TEXT(" eq_op") + break; + case ne_op: + SHOW_PARSE_TEXT(" ne_op") + break; + case ge_op: + SHOW_PARSE_TEXT(" ge_op") + break; + case gt_op: + SHOW_PARSE_TEXT(" gt_op") + break; + } + char ach[32]; + sprintf(ach, " first-last-key:%d", flk); + SHOW_PARSE_TEXT(ach) + SHOW_PARSE_REF(" length:", length_ref); + } + else + { + SHOW_PARSE_TEXT(" *file is NULL") + } + SHOW_PARSE_END + } + + if( flk == 0 + && (file->org == file_indexed_e || file->org == file_relative_e) ) + { + flk = 1; + op = eq_op; + } + + if( flk == 0 + && (file->org == file_sequential_e) ) + { + flk = -1; + } + + static tree length = gg_define_variable(SIZE_T, "..pfs_length", vs_file_static); + gg_assign(length, size_t_zero_node); + + if( flk > 0 && !length_ref.field ) + { + // We need a length, and we don't have one. We have to calculate the length + // from the lengths of the fields that make up the specified key. + + size_t combined_length = 0; + + gcc_assert(flk <= (int)file->nkey); + + int key_number = flk-1; + + // A key has a number of fields + for(size_t ifield=0; ifieldkeys[key_number].nfield; ifield++) + { + size_t field_index = file->keys[key_number].fields[ifield]; + cbl_field_t *field = cbl_field_of(symbol_at(field_index)); + combined_length += field->data.capacity; + } + gg_assign(length, build_int_cst_type(SIZE_T, combined_length)); + } + else if( flk > 0 ) + { + get_binary_value( length, + NULL, + length_ref.field, + refer_offset_dest(length_ref)); + } + + store_location_stuff("START"); + gg_call(VOID, + "__gg__file_start", + gg_get_address_of(file->var_decl_node), + build_int_cst_type(INT, op), + build_int_cst_type(INT, flk), + length, + NULL_TREE ); + set_user_status(file); + } + +static void +inspect_tally(bool backward, + cbl_refer_t identifier_1, + unsigned long n_identifier_2, + cbx_inspect_t* identifier_2) + { + Analyze(); + // This is an INSPECT FORMAT 1 + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + // Make one pass through the inputs to count up the sizes of the arrays + // we will be passing to the library routines. This loop structure simply + // anticipates the more complex one that follows. + + size_t int_index = 0; + size_t pcbl_index = 0; + + // The first integer is the all-important controlling count: + int_index++; + + // The first refer is for identifier-1 + pcbl_index++; + + for( size_t i=0; i* operations) + { + Analyze(); + // This is an INSPECT FORMAT 2 + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" ") + } + + // For REPLACING, unlike TALLY, there can be but one operation + gcc_assert(n_ops == 1); + + size_t n_id_3 = 0; + size_t n_id_4 = 0; + size_t n_id_5 = 0; + size_t n_all_leading_first = 0; + + // Make one pass through the inputs to count up the sizes of the arrays + // we will be passing to the library routines: + + for( size_t j=0; jtype == FldLiteralN ) + { + fprintf(stderr, "INSPECT field %s shouldn't be a FldLiteralN\n", + pcbl_refers[i].field->name); + gcc_unreachable(); + } + } + + build_array_of_treeplets(1, pcbl_index, pcbl_refers); + + // Do the actual call: + gg_call(VOID, + "__gg__inspect_format_2", + backward ? integer_one_node : integer_zero_node, + integers, + NULL_TREE); + } + +void +parser_inspect(cbl_refer_t identifier_1, + bool backward, + unsigned long n_operations, + cbx_inspect_t* operations) + { + Analyze(); + gcc_assert(n_operations); + + /* Operating philosophy: We are going to minimize the amount of + GENERIC tag creation here at compile time, mainly by eliminating + the generation of cbl_resolved_t structures that we know + contain no information. */ + + if( operations[0].tally.field ) + { + // This is a FORMAT 1 "TALLYING" + inspect_tally(backward, identifier_1, n_operations, operations); + } + else + { + // This is a FORMAT 2 "REPLACING" + inspect_replacing(backward, identifier_1, n_operations, operations); + } + } + +void +parser_inspect_conv(cbl_refer_t input, + bool backward, + cbl_refer_t original, + cbl_refer_t replacement, + cbl_inspect_qual_t before, + cbl_inspect_qual_t after ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + gg_call(CHAR_P, + "__gg__inspect_format_4", + backward ? integer_one_node : integer_zero_node, + input.field ? gg_get_address_of(input.field->var_decl_node) + : null_pointer_node, + refer_offset_source(input), + refer_size_source(input), + original.field ? gg_get_address_of(original.field->var_decl_node) + : null_pointer_node, + refer_offset_dest(original), + refer_size_dest(original), + replacement.field ? gg_get_address_of( + replacement.field->var_decl_node) + : null_pointer_node, + refer_offset_source(replacement), + replacement.all ? build_int_cst_type(SIZE_T, -1LL) + : refer_size_source(replacement), + after.identifier_4.field ? gg_get_address_of( + after.identifier_4.field->var_decl_node) + : null_pointer_node, + refer_offset_source(after.identifier_4), + refer_size_source(after.identifier_4), + before.identifier_4.field ? gg_get_address_of( + before.identifier_4.field->var_decl_node) + : null_pointer_node, + refer_offset_source(before.identifier_4), + refer_size_source(before.identifier_4), + NULL_TREE + ); + } + +void +parser_module_name( cbl_field_t *tgt, module_type_t type ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + gg_call(VOID, + "__gg__module_name", + gg_get_address_of(tgt->var_decl_node), + build_int_cst_type(INT, type), + NULL_TREE); + } + +void +parser_intrinsic_numval_c( cbl_field_t *f, + cbl_refer_t& input, + bool locale, + cbl_refer_t& currency, + bool anycase, + bool test_numval_c ) // true for TEST-NUMVAL-C + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + if( locale || anycase ) + { + gcc_unreachable(); + } + if( test_numval_c ) + { + gg_call(INT, + "__gg__test_numval_c", + gg_get_address_of(f->var_decl_node), + gg_get_address_of(input.field->var_decl_node), + refer_offset_source(input), + refer_size_source(input), + currency.field ? gg_get_address_of(currency.field->var_decl_node) : null_pointer_node, + refer_offset_source(currency), + refer_size_source(currency), + NULL_TREE + ); + } + else + { + gg_call(INT, + "__gg__numval_c", + gg_get_address_of(f->var_decl_node), + gg_get_address_of(input.field->var_decl_node), + refer_offset_source(input), + refer_size_source(input), + currency.field ? gg_get_address_of(currency.field->var_decl_node) : null_pointer_node, + refer_offset_source(currency), + refer_size_source(currency), + NULL_TREE + ); + } + } + +void +parser_intrinsic_subst( cbl_field_t *f, + cbl_refer_t& ref1, + size_t argc, + cbl_substitute_t * argv ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + store_location_stuff("SUBSTITUTE"); + unsigned char *control_bytes = (unsigned char *)xmalloc(argc * sizeof(unsigned char)); + cbl_refer_t *arg1 = (cbl_refer_t *)xmalloc(argc * sizeof(cbl_refer_t)); + cbl_refer_t *arg2 = (cbl_refer_t *)xmalloc(argc * sizeof(cbl_refer_t)); + + for(size_t i=0; ivar_decl_node), + gg_get_address_of(ref1.field->var_decl_node), + refer_offset_source(ref1), + refer_size_source(ref1), + build_int_cst_type(SIZE_T, argc), + control, + NULL_TREE); + + gg_free(control); + + free(arg2); + free(arg1); + free(control_bytes); + } + +void +parser_intrinsic_callv( cbl_field_t *tgt, + const char function_name[], + size_t nrefs, + cbl_refer_t *refs ) + { + Analyze(); + // We have been given an array of refs[nrefs]. Each ref is a pointer + // to a cbl_ref_t. We convert that to a table of pointers to run-time + // cblc_ref_t structures, and we pass that to the function_name intrinsic + // function. It is in charge of conversion to whatever form is needed. + + // We get back a return value, which we convert to tgt based on the + // intrinsic_return_type + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" of ") + SHOW_PARSE_TEXT(function_name) + fprintf(stderr, " with %zd parameters", nrefs); + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("about to call \"") + TRACE1_TEXT(function_name) + TRACE1_TEXT("\"") + for(size_t i=0; ivar_decl_node), + ncount, + NULL_TREE); + + TRACE1 + { + TRACE1_INDENT + TRACE1_FIELD("result: ", tgt, "") + TRACE1_END + } + } + +void +parser_intrinsic_call_0(cbl_field_t *tgt, + const char function_name[]) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" of ") + SHOW_PARSE_TEXT(function_name) + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("about to call \"") + TRACE1_TEXT(function_name) + TRACE1_TEXT("\"") + } + + if( strcmp(function_name, "__gg__random") == 0 ) + { + // We have no seed value, so call the "next" routine + gg_call(VOID, + "__gg__random_next", + gg_get_address_of(tgt->var_decl_node), + NULL_TREE); + } + else if( strcmp(function_name, "__gg__when_compiled") == 0 ) + { + // Pass __gg__when_compiled() the time from right now. + struct timespec tp; + clock_gettime(CLOCK_REALTIME, &tp); // time_t tv_sec; long tv_nsec + + store_location_stuff(function_name); + gg_call(VOID, + function_name, + gg_get_address_of(tgt->var_decl_node), + build_int_cst(SIZE_T, tp.tv_sec), + build_int_cst(LONG, tp.tv_nsec), + NULL_TREE); + } + else + { + store_location_stuff(function_name); + gg_call(VOID, + function_name, + gg_get_address_of(tgt->var_decl_node), + NULL_TREE); + } + + TRACE1 + { + TRACE1_INDENT + TRACE1_FIELD("result: ", tgt, "") + TRACE1_END + } + } + +void +parser_intrinsic_call_1( cbl_field_t *tgt, + const char function_name[], + cbl_refer_t& ref1 ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" of ") + SHOW_PARSE_TEXT(function_name) + SHOW_PARSE_END + } + + // There are special cases: + if( strstr(function_name, "__gg__length") ) + { + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("about to call \"") + TRACE1_TEXT(function_name) + TRACE1_TEXT("\"") + TRACE1_INDENT + TRACE1_REFER("parameter: ", ref1, "") + } + size_t upper = ref1.field->occurs.bounds.upper + ? ref1.field->occurs.bounds.upper : 1; + if( ref1.nsubscript ) + { + upper = 1; + } + + if( is_table(ref1.field) && !ref1.nsubscript ) + { + static tree depending_on = gg_define_variable(LONG, "..pic1_dep"); + gg_get_depending_on_value(depending_on, ref1.field); + gg_call(VOID, + "__gg__int128_to_field", + gg_get_address_of(tgt->var_decl_node), + gg_cast(INT128, + gg_multiply(refer_size_source(ref1), + depending_on)), + integer_zero_node, + build_int_cst_type(INT, truncation_e), + null_pointer_node, + NULL_TREE ); + } + else + { + if( upper == 1 ) + { + gg_call(VOID, + "__gg__int128_to_field", + gg_get_address_of(tgt->var_decl_node), + gg_cast(INT128, + refer_size_source(ref1)), + integer_zero_node, + build_int_cst_type(INT, truncation_e), + null_pointer_node, + NULL_TREE ); + } + else + { + gg_call(VOID, + "__gg__int128_to_field", + gg_get_address_of(tgt->var_decl_node), + gg_cast(INT128, + gg_multiply(refer_size_source(ref1), + build_int_cst_type(SIZE_T, upper))), + integer_zero_node, + build_int_cst_type(INT, truncation_e), + null_pointer_node, + NULL_TREE ); + } + } + } + else + { + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("about to call \"") + TRACE1_TEXT(function_name) + TRACE1_TEXT("\"") + TRACE1_INDENT + TRACE1_REFER("parameter: ", ref1, "") + } + gg_call(VOID, + function_name, + gg_get_address_of(tgt->var_decl_node), + gg_get_address_of(ref1.field->var_decl_node), + refer_offset_source(ref1), + refer_size_source(ref1), + NULL_TREE); + } + + TRACE1 + { + TRACE1_INDENT + TRACE1_FIELD("result: ", tgt, "") + TRACE1_END + } + } + +void +parser_intrinsic_call_2( cbl_field_t *tgt, + const char function_name[], + cbl_refer_t& ref1, + cbl_refer_t& ref2 ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" of ") + SHOW_PARSE_TEXT(function_name) + SHOW_PARSE_END + } + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("about to call \"") + TRACE1_TEXT(function_name) + TRACE1_TEXT("\"") + TRACE1_INDENT + TRACE1_REFER("parameter 1: ", ref1, "") + TRACE1_INDENT + TRACE1_REFER("parameter 2: ", ref2, "") + } + store_location_stuff(function_name); + gg_call(VOID, + function_name, + gg_get_address_of(tgt->var_decl_node), + gg_get_address_of(ref1.field->var_decl_node), + refer_offset_source(ref1), + refer_size_source(ref1), + ref2.field ? gg_get_address_of(ref2.field->var_decl_node) : null_pointer_node, + refer_offset_source(ref2), + refer_size_source(ref2), + NULL_TREE); + TRACE1 + { + TRACE1_INDENT + TRACE1_FIELD("result: ", tgt, "") + TRACE1_END + } + } + +void +parser_intrinsic_call_3( cbl_field_t *tgt, + const char function_name[], + cbl_refer_t& ref1, + cbl_refer_t& ref2, + cbl_refer_t& ref3 ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" of ") + SHOW_PARSE_TEXT(function_name) + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("about to call \"") + TRACE1_TEXT(function_name) + TRACE1_TEXT("\"") + TRACE1_INDENT + TRACE1_REFER("parameter 1: ", ref1, "") + TRACE1_INDENT + TRACE1_REFER("parameter 2: ", ref2, "") + TRACE1_INDENT + TRACE1_REFER("parameter 3: ", ref3, "") + } + + store_location_stuff(function_name); + + gg_call(VOID, + function_name, + gg_get_address_of(tgt->var_decl_node), + ref1.field ? gg_get_address_of(ref1.field->var_decl_node) : null_pointer_node, + refer_offset_source(ref1), + refer_size_source(ref1), + ref2.field ? gg_get_address_of(ref2.field->var_decl_node) : null_pointer_node, + refer_offset_source(ref2), + refer_size_source(ref2), + ref3.field ? gg_get_address_of(ref3.field->var_decl_node) : null_pointer_node, + refer_offset_source(ref3), + refer_size_source(ref3), + NULL_TREE); + TRACE1 + { + TRACE1_INDENT + TRACE1_FIELD("result: ", tgt, "") + TRACE1_END + } + } + +void +parser_intrinsic_call_4( cbl_field_t *tgt, + const char function_name[], + cbl_refer_t& ref1, + cbl_refer_t& ref2, + cbl_refer_t& ref3, + cbl_refer_t& ref4 ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" of ") + SHOW_PARSE_TEXT(function_name) + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("about to call \"") + TRACE1_TEXT(function_name) + TRACE1_TEXT("\"") + TRACE1_INDENT + TRACE1_REFER("parameter 1: ", ref1, "") + TRACE1_INDENT + TRACE1_REFER("parameter 2: ", ref2, "") + TRACE1_INDENT + TRACE1_REFER("parameter 3: ", ref3, "") + TRACE1_INDENT + TRACE1_REFER("parameter 4: ", ref4, "") + } + store_location_stuff(function_name); + + gg_call(VOID, + function_name, + gg_get_address_of(tgt->var_decl_node), + ref1.field ? gg_get_address_of(ref1.field->var_decl_node) : null_pointer_node, + refer_offset_source(ref1), + refer_size_source(ref1), + ref2.field ? gg_get_address_of(ref2.field->var_decl_node) : null_pointer_node, + refer_offset_source(ref2), + refer_size_source(ref2), + ref3.field ? gg_get_address_of(ref3.field->var_decl_node) : null_pointer_node, + refer_offset_source(ref3), + refer_size_source(ref3), + ref4.field ? gg_get_address_of(ref4.field->var_decl_node) : null_pointer_node, + refer_offset_source(ref4), + refer_size_source(ref4), + NULL_TREE); + TRACE1 + { + TRACE1_INDENT + TRACE1_FIELD("result: ", tgt, "") + TRACE1_END + } + } + +static void +field_increment(cbl_field_t *fld) + { + static tree value = gg_define_variable(INT128, "..fi_value", vs_file_static); + static tree rdigits = gg_define_variable(INT, "..fi_rdigits", vs_file_static); + get_binary_value(value, rdigits, fld, size_t_zero_node); + gg_assign( value, + gg_add(value, gg_cast(SIZE_T, integer_one_node))); + gg_call(VOID, + "__gg__int128_to_field", + gg_get_address_of(fld->var_decl_node), + value, + rdigits, + build_int_cst_type(INT, truncation_e), + null_pointer_node, + NULL_TREE ); + } + +static void +create_lsearch_address_pairs(struct cbl_label_t *name) + { + // Create the lsearch structure + name->structs.lsearch = (cbl_lsearch_t *)xmalloc(sizeof(cbl_lsearch_t)); + cbl_lsearch_t *lsearch = name->structs.lsearch; + + gg_create_goto_pair(&lsearch->addresses.at_exit.go_to, + &lsearch->addresses.at_exit.label); + + gg_create_goto_pair(&lsearch->addresses.top.go_to, + &lsearch->addresses.top.label); + + gg_create_goto_pair(&lsearch->addresses.bottom.go_to, + &lsearch->addresses.bottom.label); + } + +void +parser_next_sentence() + { + // Eventually we'll need this. + } + +void +parser_lsearch_start( cbl_label_t *name, + cbl_field_t *table, + cbl_field_t *index, + cbl_field_t *varying ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( name ) + { + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(name->name) + } + if( table ) + { + SHOW_PARSE_TEXT(" linear search of ") + SHOW_PARSE_TEXT(table->name) + } + if( index ) + { + SHOW_PARSE_TEXT(" index is ") + SHOW_PARSE_TEXT(index->name) + } + if( varying ) + { + SHOW_PARSE_TEXT(" varying ") + SHOW_PARSE_TEXT(varying->name) + } + SHOW_PARSE_END + } + // Create the goto/label pairs we are going to be needing: + create_lsearch_address_pairs(name); + cbl_lsearch_t *lsearch = name->structs.lsearch; + lsearch->first_when = true; + + // We need to find the first table element: + cbl_field_t *current = table; + while(current) + { + if( is_table(current) ) + { + // Extract the number of elements in that rightmost dimension. + lsearch->limit = gg_define_variable(LONG); + gg_get_depending_on_value(lsearch->limit, current); + break; + } + current = parent_of(current); + } + + // Establish the initial value of our counter: + lsearch->counter = gg_define_variable(LONG); + + tree value = gg_define_int128(); + if(varying) + { + get_binary_value(value, NULL, varying, size_t_zero_node); + } + else if( index ) + { + get_binary_value(value, NULL, index, size_t_zero_node); + } + gg_assign(lsearch->counter, gg_cast(LONG, value)); + + // And we need these around, so we can increment them: + lsearch->index = index; + lsearch->varying = varying; + + // From here we have to jump to the top of the loop: + gg_append_statement(lsearch->addresses.top.go_to); + + // The next next instructions will be the body of the at-exit code, so + // we need a label here so that we can get back to them + gg_append_statement(lsearch->addresses.at_exit.label); + } + +void +parser_lsearch_conditional(cbl_label_t * name) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( name ) + { + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(name->name) + } + SHOW_PARSE_END + } + cbl_lsearch_t *lsearch = name->structs.lsearch; + + if( lsearch->first_when ) + { + lsearch->first_when = false; + // We are the first of the WHEN CONDITIONALs, which means we just laid down the final + // statement of the AT-EXIT imperative statements, which means it's + // time to leave the SEARCH completely. + gg_append_statement(lsearch->addresses.bottom.go_to); + + // And that puts us at the top of the loop: + gg_append_statement(lsearch->addresses.top.label); + + // It is at this point we check to see if we have reached the limit: + IF( lsearch->counter, gt_op, lsearch->limit ) + // The counter has run out. + gg_append_statement(lsearch->addresses.at_exit.go_to); + ELSE + // Just fall through into the following statements, which are + // the statements for the conditional for the first WHEN + ENDIF + } + else + { + // We are at the end of a WHEN TRUE imperative statement. + gg_append_statement(lsearch->addresses.bottom.go_to); + + // This is the second or later search_conditional. Note that the + // code generated here executes after the first parser_when call, so + // the jump_over label is ready to be placed. + + // We have to lay down the unnamed label so the prior WHEN can jump past + // its imperative statements when its condition is not met: + gg_append_statement(lsearch->jump_over.label); + } + // At this point, the parser starts laying down the statements that make + // up the next conditional. + } + +void +parser_lsearch_when( cbl_label_t *name, cbl_field_t *conditional ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( name ) + { + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(name->name) + } + SHOW_PARSE_END + } + cbl_lsearch_t *lsearch = name->structs.lsearch; + + // Arriving here means that all of the conditional statements have been + // laid down, and we are ready to do the WHEN test: + + parser_if(conditional); + // We have found what we were looking for. Fall through to the next + // set of instructions, which comprise the imperative statement + // associated with the WHEN condition. + ELSE + // The conditional is false. We thus want to skip over the imperative + // instructions that are about to be laid down. + + // Create an unnamed goto/label pair: + gg_create_goto_pair(&lsearch->jump_over.go_to, + &lsearch->jump_over.label); + + // And lay down the goto. + gg_append_statement(lsearch->jump_over.go_to); + ENDIF + } + +void +parser_lsearch_end( cbl_label_t *name ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( name ) + { + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(name->name) + } + SHOW_PARSE_END + } + cbl_lsearch_t *lsearch = name->structs.lsearch; + + // Arriving here means we have just laid down the final imperative + // statements of the final WHEN. If these statements have been executing, + // it's now time to leave the SEARCH: + gg_append_statement(lsearch->addresses.bottom.go_to); + + // It's time to lay down the last jump_over label: + gg_append_statement(lsearch->jump_over.label); + + // With that in place, we increment stuff: + gg_assign(lsearch->counter, gg_add(lsearch->counter, gg_cast(LONG, integer_one_node))); + field_increment(lsearch->index); + + if( lsearch->varying ) + { + field_increment(lsearch->varying); + } + // From here we jump to the top of the loop: + gg_append_statement(lsearch->addresses.top.go_to); + + // And that means we now lay down the label for the bottom + gg_append_statement(lsearch->addresses.bottom.label); + + // At this point, we are done with the lsearch structure + free(lsearch); + lsearch = NULL; + } + +void +parser_bsearch_start( cbl_label_t* name, + cbl_field_t *table ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( name ) + { + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(name->name) + } + if( table ) + { + SHOW_PARSE_TEXT(" binary search of ") + SHOW_PARSE_TEXT(table->name) + } + SHOW_PARSE_END + } + + // We need a cbl_bsearch_t structure: + name->structs.bsearch = (cbl_bsearch_t *)xmalloc(sizeof(cbl_bsearch_t)); + cbl_bsearch_t *bsearch = name->structs.bsearch; + + // Create the address/label pairs we need + gg_create_goto_pair(&bsearch->too_small.go_to, + &bsearch->too_small.label); + + gg_create_goto_pair(&bsearch->too_big.go_to, + &bsearch->too_big.label); + + gg_create_goto_pair(&bsearch->top.go_to, + &bsearch->top.label); + + gg_create_goto_pair(&bsearch->first_test.go_to, + &bsearch->first_test.label); + + gg_create_goto_pair(&bsearch->bottom.go_to, + &bsearch->bottom.label); + + // The logic when we first hit a WHEN needs to be different: + bsearch->first_when = true; + + // We need to find our table element: + cbl_field_t *current = table; + while(current) + { + if( is_table(current) ) + { + break; + } + current = parent_of(current); + } + + // There are a number of things we learn from the field "current" + + // We get the index: + gcc_assert(current->occurs.indexes.nfield); + size_t index_index = current->occurs.indexes.fields[0]; + bsearch->index = cbl_field_of( symbol_at(index_index) ); + gcc_assert(bsearch->index); + + // And we get the rightward bound of the number of elements: + // Not that these are LONGS, not SIZE_T. If we are searching for something + // that is smaller than element[0] of the table, then right ends up being + // -1, so we have to have a signed type. + bsearch->left = gg_define_variable(LONG, "_left"); + bsearch->right = gg_define_variable(LONG, "_right"); + bsearch->middle = gg_define_variable(LONG, "_middle"); + + // Assign the left and right values: + gg_assign(bsearch->left, build_int_cst_type(LONG, 1)); + gg_get_depending_on_value(bsearch->right, current); + + // Create the variable that will take the compare result. + bsearch->compare_result = gg_define_int(); + + // We now jump to the top of the binary testing loop, which comes right + // after the labels where we handle non-equal cases: + gg_append_statement(bsearch->top.go_to); + + gg_append_statement(bsearch->too_small.label); + // Arrive here when the element in the array is smaller than the one we are + // looking for. This means that we move bsearch->left to the right: + gg_assign(bsearch->left, gg_add(bsearch->middle, build_int_cst_type(LONG, 1))); + gg_append_statement(bsearch->top.go_to); + + gg_append_statement(bsearch->too_big.label); + // Arrive here when the element in the array is larger than the one we + // are looking for. This means we have to move bsearch->right to the left: + gg_assign(bsearch->right, gg_subtract(bsearch->middle, build_int_cst_type(LONG, 1))); + // Fall through to TOP: + + gg_append_statement(bsearch->top.label); + // Arrive here when it is time to check to see if we are done: + IF( bsearch->left, le_op, bsearch->right ) + // We are not done. Calculate middle from 'left' and 'right' + gg_assign( bsearch->middle, + gg_add(bsearch->left, bsearch->right) ); + gg_assign( bsearch->middle, + gg_divide(bsearch->middle, build_int_cst_type(LONG, 2) )); + //gg_printf("BSEARCH At the top %ld %ld %ld\n", bsearch->left, bsearch->middle, bsearch->right, NULL_TREE); + // We need to assign that value to bsearch->index. It might be possible + // to assume that bsearch->index is a size_t and just cram the bytes into + // place at bsearch->index->var_decl_node->data. But for now we'll + // be cautious and use the slower, but more assured, method: + + gg_call(VOID, + "__gg__int128_to_field", + gg_get_address_of(bsearch->index->var_decl_node), + gg_cast(INT128, bsearch->middle), + integer_zero_node, + build_int_cst_type(INT, truncation_e), + null_pointer_node, + NULL_TREE ); + // And with middle/index established, we go do the WHEN clause: + gg_append_statement(bsearch->first_test.go_to); + ELSE + // The search ended without finding anything. Fall through to the + // AT-EXIT imperative statements that the parser will lay down right + // after the call to parser_bsearch_start(). + ENDIF + } + +void +parser_bsearch_conditional( cbl_label_t* name ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( name ) + { + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(name->name) + } + SHOW_PARSE_END + } + cbl_bsearch_t *bsearch = name->structs.bsearch; + + if( bsearch->first_when ) + { + bsearch->first_when = false; + // The first time we arrive here is after the WHEN part of the SEARCH ALL + // statement. We have just finished executing any AT-END statements there + // might be, so it's time to jump to the bottom: + gg_append_statement(bsearch->bottom.go_to); + + // Otherwise, the TOP part of the loop just calculated the next middle/index, + // and we now start processing it + + gg_append_statement(bsearch->first_test.label); + } + // The second parser_bsearch_conditional() is caused by the appearance of + // any subsequent AND clauses. And, it turns out, we do nothing. + + // The parser lays down the statements that calculate the conditional, + // and we just wait for parser_bsearch_when() + } + +bool +is_ascending_key(cbl_refer_t key) + { + bool retval = true; + + cbl_field_t *family_tree = key.field; + gcc_assert(family_tree); + while( family_tree ) + { + if( family_tree->occurs.nkey ) + { + break; + } + family_tree = parent_of(family_tree); + } + gcc_assert(family_tree->occurs.nkey); + for(size_t i=0; ioccurs.nkey; i++) + { + for(size_t j=0; joccurs.keys[i].field_list.nfield; j++) + { + size_t index_of_field + = family_tree->occurs.keys[i].field_list.fields[j]; + cbl_field_t *key_field = cbl_field_of(symbol_at(index_of_field)); + + if( strcmp( key_field->name, + key.field->name ) == 0 ) + { + retval = family_tree->occurs.keys[i].ascending; + goto done; + } + } + } + +done: + return retval; + } + +void +parser_bsearch_when(cbl_label_t* name, + cbl_refer_t key, + cbl_refer_t sarg, + bool ascending) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( name ) + { + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(name->name) + } + SHOW_PARSE_END + } + cbl_bsearch_t *bsearch = name->structs.bsearch; + + if( ascending ) + { + cobol_compare( bsearch->compare_result, + key, + sarg ); + } + else + { + cobol_compare( bsearch->compare_result, + sarg, + key ); + } + + IF( bsearch->compare_result, lt_op, integer_zero_node ) + // The key is smaller than sarg: + gg_append_statement(bsearch->too_small.go_to); + ELSE + ENDIF + IF( bsearch->compare_result, gt_op, integer_zero_node ) + // The key is larger than sarg: + gg_append_statement(bsearch->too_big.go_to); + ELSE + ENDIF + + // We are at the Goldilocks point. The clause has been satisfied with + // an equality, so we will just fall through to the next set of statements + // that the parser laid down. They are either the next conditional, or + // the final imperative statements that get executed when all the + // clauses are satisfied. + } + +void +parser_bsearch_end( cbl_label_t* name ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( name ) + { + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(name->name) + } + SHOW_PARSE_END + } + cbl_bsearch_t *bsearch = name->structs.bsearch; + + // Arriving here means that either the search ran out without finding + // anything, (see the test up at TOP:), or else we just fell through from + // the statements that executed after all the WHEN/AFTER clauses were + // satisifed by equality (meaning there were no jumps to TOO_SMALL: or + // TOO_LARGE). In other words: we're done. + gg_append_statement(bsearch->bottom.label); + + free(bsearch); + } + +tree +gg_array_of_field_pointers( size_t N, + cbl_field_t **fields ) + { + tree retval = gg_define_variable(build_pointer_type(cblc_field_p_type_node)); + gg_assign(retval, gg_cast(build_pointer_type(cblc_field_p_type_node), gg_malloc( build_int_cst_type(SIZE_T, N * sizeof(void *))))); + for(size_t i=0; ivar_decl_node)); + } + return retval; + } + +static void +push_program_state() + { + gg_call(VOID, + "__gg__push_program_state", + NULL_TREE); + } + +static void +pop_program_state() + { + gg_call(VOID, + "__gg__pop_program_state", + NULL_TREE); + } + +void +parser_sort(cbl_refer_t tableref, + bool duplicates, + cbl_alphabet_t *alphabet, + size_t nkeys, + cbl_key_t *keys ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( tableref.field ) + { + SHOW_PARSE_REF(" Sort table: ", tableref) + } + SHOW_PARSE_END + } + + cbl_field_t *table = tableref.field; + gcc_assert(table); + gcc_assert(table->var_decl_node); + if( !is_table(table) ) + { + cbl_internal_error( "%s(): asked to sort %s, but it's not a table", + __func__, + tableref.field->name); + } + size_t total_keys = 0; + for( size_t i=0; ivar_decl_node), + refer_offset_source(tableref), + gg_cast(SIZE_T, depending_on), + build_int_cst_type(SIZE_T, key_index), + all_keys, + ascending, + duplicates ? integer_one_node : integer_zero_node, + NULL_TREE); + if( alphabet ) + { + pop_program_state(); + } + + free(flattened_ascending); + free(flattened_fields); + + gg_free(ascending); + gg_free(all_keys); + } + +void +parser_file_sort( cbl_file_t *workfile, + bool duplicates, + cbl_alphabet_t *alphabet, + size_t nkeys, + cbl_key_t *keys, + size_t ninput, + cbl_file_t **inputs, + size_t noutput, + cbl_file_t **outputs, + cbl_perform_tgt_t *in_proc, + cbl_perform_tgt_t *out_proc ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + // This is the implementation of SORT FORMAT 1 + + // It proceeds in three phases. + + // The first phase is absorbing the input and writing it out to the workfile: + + parser_file_open(workfile, 'w'); + IF( member(workfile, "io_status"), ge_op, build_int_cst_type(INT, FsEofSeq) ) + { + gg_printf("Couldn't open the SORT workfile for writing\n", NULL_TREE); + gg_exit(integer_one_node); + } + ELSE + ENDIF + + if( in_proc && !ninput ) + { + // We are getting our inputs from an input procedure + parser_perform(in_proc, NULL); + } + else if( ninput && !in_proc ) + { + // ninput means there was a USING clause, specifying input files. + + // We are going to transfer the input file[s] to the workfile. The + // transfer will be done so that any newlines in a LINE SEQUENTIAL file + // are skipped, and so that any records that are too long, or too short, + // are all normalized to the format of the SD record. + for(size_t i=0; i var_decl_node), + gg_get_address_of(inputs[i]->var_decl_node), + NULL_TREE); + parser_file_close(inputs[i]); + } + } + else + { + // Having both or neither violates SORT syntax + cbl_internal_error("%s(): syntax error -- both (or neither) USING " + "and input-proc are specified", + __func__); + } + parser_file_close(workfile); + + // At this point, we have workfile of unsorted data. We have a library + // routine that sorts the workfile. It needs the keys: + + // The following is a tad more complex than it needs to be. It's a partial + // clone of the code for handling multiple keys, each of which can have + // multiple fields. + + size_t total_keys = 0; + for( size_t i=0; ivar_decl_node), + build_int_cst_type(SIZE_T, key_index), + all_keys, + ascending, + duplicates ? integer_one_node : integer_zero_node, + NULL_TREE); + if( alphabet ) + { + pop_program_state(); + } + parser_file_close(workfile); + + free(flattened_ascending); + free(flattened_fields); + gg_free(ascending); + gg_free(all_keys); + + // The workfile is sorted. We move to Phase 3 -- transferring the workfile + // to the output. + + if( noutput && !out_proc) + { + // We have a GIVING phrase: + for(size_t i=0; ivar_decl_node), + gg_get_address_of(workfile->var_decl_node), + NULL_TREE); + parser_file_close(outputs[i]); + parser_file_close(workfile); + } + } + else if (!noutput && out_proc) + { + // We are going to transfer the workfile to the output procedures. + parser_file_open(workfile,'r'); + IF( member(workfile, "io_status"), + ge_op, + build_int_cst(INT, FhNotOkay) ) + { + rt_error("Couldn't open workfile for stage-three " + "output in parser_file_sort"); + } + ELSE + { + parser_perform(out_proc, NULL); + parser_file_close(workfile); + } + ENDIF + } + else + { + cbl_internal_error("%s(): syntax error -- both (or neither) GIVING " + "and output-proc are specified", __func__); + } + } + +void +parser_release( cbl_field_t *record_area ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + // When this routine is called, it writes the contents of 'record_area' to the + // workfile specified by the cbl_file_t parent of record_area: + + cbl_file_t *workfile = symbol_record_file(record_area); + + gg_call(VOID, + "__gg__file_write", + gg_get_address_of( workfile->var_decl_node), + member(record_area, "data"), + member(record_area, "capacity"), + integer_zero_node, + integer_minusone_node, + integer_zero_node, + NULL_TREE); // non-random + set_user_status(workfile); + } + +void +parser_return_start( cbl_file_t *workfile, cbl_refer_t into ) + { + Analyze(); + // This function helps implement the COBOL RETURN statement, which is used + // in SORT and MERGE to "return" data from an intermediate sort/merge file + // to SORT/MERGE output procedure. + + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + // We assume that workfile is open. + + workfile->addresses = (cbl_sortreturn_t *)xmalloc(sizeof(cbl_sortreturn_t)); + gg_create_goto_pair(&workfile->addresses->at_end.go_to, + &workfile->addresses->at_end.label); + gg_create_goto_pair(&workfile->addresses->not_at_end.go_to, + &workfile->addresses->not_at_end.label); + gg_create_goto_pair(&workfile->addresses->bottom.go_to, + &workfile->addresses->bottom.label); + + // Read the data from workfile into the SD record position: + cbl_field_t *data_location = symbol_file_record(workfile); + parser_file_read(workfile, data_location, -1 ); + + // And jump to either at_end or not_at_end, depending: + IF( member(workfile, "io_status"), lt_op, build_int_cst(INT, FsEofSeq) ) + { + // The read was successful. We move the result into place + if( into.field ) + { + cbl_field_t *record_area = + cbl_field_of(symbol_at(workfile->default_record)); + parser_move(into, record_area, truncation_e); + } + // And having moved -- or not -- the record, jump to the not-at-end + // imperative + gg_append_statement(workfile->addresses->not_at_end.go_to); + } + ELSE + ENDIF + + IF( member(workfile, "io_status"), lt_op, build_int_cst(INT, FsKeySeq) ) + { + // The read didn't succeed because of an end-of-file condition + gg_append_statement(workfile->addresses->at_end.go_to); + } + ELSE + ENDIF + + // Arriving here means some kind of error condition. So, we don't do the + // move, and we jump to the end of the statement + gg_append_statement(workfile->addresses->bottom.go_to); + } + +void +parser_return_atend( cbl_file_t *workfile ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + // There might or might not be an at_end clause, and it might, or might + // not, appear after a not_at_end clause. If we are appearing after + // a not_at_end clause, we need to finish that clause with a jump to the + // bottom of the logic: + if( !workfile->addresses->not_at_end.label ) + { + // We have been preceded by a not_at_end label. So, we need to + // put in a jump to end those statements: + gg_append_statement(workfile->addresses->bottom.go_to); + } + // And now we place the at_end label: + gg_append_statement(workfile->addresses->at_end.label); + + // And having placed it, NULL it out + workfile->addresses->at_end.label = NULL; + + // The imperative statements of the NOT AT END clause will follow + } + +void +parser_return_notatend( cbl_file_t *workfile ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + // There might or might not be a not_at_end clause, and it might, or might + // not, appear after a at_end clause. If we are appearing after + // a at_end clause, we need to finish that clause with a jump to the + // bottom of the logic: + if( !workfile->addresses->at_end.label ) + { + // We have been preceded by an at_end label. So, we need to + // put in a jump to end those statements: + gg_append_statement(workfile->addresses->bottom.go_to); + } + // And now we place the not_at_end label: + gg_append_statement(workfile->addresses->not_at_end.label); + + // And having placed it, NULL it out + workfile->addresses->not_at_end.label = NULL; + + // The imperative statements of the AT END clause will follow + } + +void +parser_return_finish( cbl_file_t *workfile ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + // If we are preceded by either an at_end or not_at_end clause, we need + // to end those statements with a jump to the bottom: + if( !workfile->addresses->at_end.label || !workfile->addresses->not_at_end.label) + { + gg_append_statement(workfile->addresses->bottom.go_to); + } + + // We need to place labels for clauses that weren't explicitly expressed + // in the COBOL source code. (Both were explicit targets of goto statements + // back in parser_return_start, so we need to place them here if they + // weren't placed elsewhere) + if( workfile->addresses->at_end.label ) + { + gg_append_statement(workfile->addresses->at_end.label); + } + if( workfile->addresses->not_at_end.label ) + { + gg_append_statement(workfile->addresses->not_at_end.label); + } + // And that brings us to the bottom: + gg_append_statement(workfile->addresses->bottom.label); + + free(workfile->addresses); + } + +static tree +gg_array_of_file_pointers( size_t N, + cbl_file_t **files ) + { + tree retval = gg_define_variable(build_pointer_type(cblc_file_p_type_node)); + gg_assign(retval, gg_cast( build_pointer_type(cblc_file_p_type_node), + gg_malloc( build_int_cst_type(SIZE_T, N * sizeof(void *))))); + for(size_t i=0; ivar_decl_node)); + } + return retval; + } + +void +parser_file_merge( cbl_file_t *workfile, + cbl_alphabet_t *alphabet, + size_t nkeys, + cbl_key_t *keys, + size_t ninputs, + cbl_file_t **inputs, + size_t noutputs, + cbl_file_t **outputs, + cbl_perform_tgt_t *out_proc ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + + // Our default file organization is LINE SEQUENTIAL, which spectacularly does + // *not* work for a SORT workfile. + if( workfile->org == file_line_sequential_e ) + { + workfile->org = file_sequential_e; + gg_assign( member(workfile->var_decl_node, "org"), + build_int_cst_type(INT, file_sequential_e)); + } + + size_t total_keys = 0; + for( size_t i=0; iname); + rt_error(ach); + } + ELSE + ENDIF + } + + cbl_field_t *sd_record = symbol_file_record(workfile); + if( alphabet ) + { + push_program_state(); + parser_alphabet_use(*alphabet); + } + gg_call(VOID, + "__gg__merge_files", + gg_get_address_of(workfile->var_decl_node), + build_int_cst_type(SIZE_T, nkeys), + all_keys, + ascending, + build_int_cst_type(SIZE_T, ninputs), + all_files, + NULL_TREE); + if( alphabet ) + { + pop_program_state(); + } + + free(flattened_ascending); + free(flattened_fields); + gg_free(ascending); + gg_free(all_keys); + + parser_file_close(workfile); + for(size_t i=0; ivar_decl_node), + gg_get_address_of(workfile-> var_decl_node), + gg_get_address_of(sd_record-> var_decl_node), + NULL_TREE); + parser_file_close(outputs[i]); + parser_file_close(workfile); + } + } + else if (!noutputs && out_proc) + { + // We are going to transfer the workfile to the output procedures. + parser_file_open(workfile,'r'); + IF( member(workfile, "io_status"), + ge_op, + build_int_cst_type(INT, FhNotOkay) ) + { + rt_error("Couldn't open workfile for" + " stage-three output in parser_file_merge"); + } + ELSE + ENDIF + parser_perform(out_proc, NULL); + parser_file_close(workfile); + } + else + { + cbl_internal_error("%s(): syntax error -- both (or neither) " + "files and output-proc are specified", __func__); + } + } + +void +parser_string_overflow( cbl_label_t *name ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + /* + * parser_string_overflow is called 0-2 times before the associated + * parser_string. + */ + + name->structs.unstring + = (cbl_unstring_t *)xmalloc(sizeof(struct cbl_unstring_t) ); + + // Set up the address pairs for this clause + gg_create_goto_pair(&name->structs.unstring->over.go_to, + &name->structs.unstring->over.label); + gg_create_goto_pair(&name->structs.unstring->into.go_to, + &name->structs.unstring->into.label); + gg_create_goto_pair(&name->structs.unstring->bottom.go_to, + &name->structs.unstring->bottom.label); + + // Jump over the [NOT] ON OVERFLOW code that is about to be laid down + gg_append_statement( name->structs.unstring->over.go_to ); + + // Create the label that allows the following code to be executed at + // the appropriate time. + gg_append_statement( name->structs.unstring->into.label ); + } + +void +parser_string_overflow_end( cbl_label_t *name ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + gg_append_statement( name->structs.unstring->bottom.go_to ); + } + +void +parser_unstring(cbl_refer_t src, + size_t ndelimited, + cbl_refer_t *delimiteds, + size_t noutputs, + cbl_refer_t *outputs, + cbl_refer_t *delimiters, + cbl_refer_t *counts, + cbl_refer_t pointer, + cbl_refer_t tally, + cbl_label_t *overflow, + cbl_label_t *not_overflow ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + if( overflow ) + { + gg_append_statement(overflow->structs.unstring->over.label); + } + if( not_overflow ) + { + gg_append_statement(not_overflow->structs.unstring->over.label); + } + + cbl_refer_t *delims = (cbl_refer_t *)xmalloc(ndelimited * sizeof(cbl_refer_t)); + char *alls = (char *)xmalloc(ndelimited+1); + + for(size_t i=0; ivar_decl_node), + refer_offset_source(src), + refer_size_source(src), + build_int_cst_type(SIZE_T, ndelimited), + t_alls, + build_int_cst_type(SIZE_T, noutputs), + pointer.field ? gg_get_address_of(pointer.field->var_decl_node) : null_pointer_node, + refer_offset_dest(pointer), + refer_size_dest(pointer), + tally.field ? gg_get_address_of(tally.field->var_decl_node) : null_pointer_node, + refer_offset_dest(tally), + refer_size_dest(tally), + NULL_TREE) + ); + free(alls); + free(delims); + + if( overflow ) + { + // We have an ON OVERFLOW clause: + IF( t_overflow, ne_op, integer_zero_node ) + // And we have an overflow condition + gg_append_statement( overflow->structs.unstring->into.go_to ); + ELSE + ENDIF + } + + if( not_overflow ) + { + // We have a NOT ON OVERFLOW clause: + IF( t_overflow, eq_op, integer_zero_node ) + // And there isn't an overflow condition: + gg_append_statement( not_overflow->structs.unstring->into.go_to ); + ELSE + ENDIF + } + + if( overflow ) + { + gg_append_statement( overflow->structs.unstring->bottom.label ); + free( overflow->structs.unstring ); + } + + if( not_overflow ) + { + gg_append_statement( not_overflow->structs.unstring->bottom.label ); + free( not_overflow->structs.unstring ); + } + } + +void +parser_string( cbl_refer_t tgt, + cbl_refer_t pointer, + size_t nsource, + cbl_string_src_t *sources, + cbl_label_t *overflow, + cbl_label_t *not_overflow ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_END + } + if( overflow ) + { + gg_append_statement(overflow->structs.unstring->over.label); + } + if( not_overflow ) + { + gg_append_statement(not_overflow->structs.unstring->over.label); + } + + // We need an array of nsource+1 integers: + size_t *integers = (size_t *)xmalloc((nsource+1)*sizeof(size_t)); + + // Count up how many treeplets we are going to need: + size_t cblc_count = 2; // tgt and pointer + for(size_t i=0; istructs.unstring->into.go_to ); + ELSE + ENDIF + } + + if( not_overflow ) + { + // We have a NOT ON OVERFLOW clause: + IF( t_overflow, eq_op, integer_zero_node ) + // And there isn't an overflow condition: + gg_append_statement( not_overflow->structs.unstring->into.go_to ); + ELSE + ENDIF + } + + if( overflow ) + { + gg_append_statement( overflow->structs.unstring->bottom.label ); + free( overflow->structs.unstring ); + } + + if( not_overflow ) + { + gg_append_statement( not_overflow->structs.unstring->bottom.label ); + free( not_overflow->structs.unstring ); + } + } + +void +parser_call_exception( cbl_label_t *name ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" ") + SHOW_PARSE_TEXT(name->name) + SHOW_PARSE_END + } + + name->structs.call_exception + = (cbl_call_exception_t *)xmalloc(sizeof(struct cbl_call_exception_t) ); + + // Set up the address pairs for this clause + gg_create_goto_pair(&name->structs.call_exception->over.go_to, + &name->structs.call_exception->over.label); + gg_create_goto_pair(&name->structs.call_exception->into.go_to, + &name->structs.call_exception->into.label); + gg_create_goto_pair(&name->structs.call_exception->bottom.go_to, + &name->structs.call_exception->bottom.label); + + // Jump over the [NOT] ON EXCEPTION code that is about to be laid down + // char ach[128]; + // sprintf(ach, "# parser_call_exception %s: over.goto", name->name); + // gg_insert_into_assembler(ach); + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("except over.goto") + SHOW_PARSE_END + } + gg_append_statement( name->structs.call_exception->over.go_to ); + + // Create the label that allows the following code to be executed at + // the appropriate time. + // sprintf(ach, "# parser_call_exception %s: into.label", name->name); + // gg_insert_into_assembler(ach); + gg_append_statement( name->structs.call_exception->into.label ); + } + +void +parser_call_exception_end( cbl_label_t *name ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(name->name) + SHOW_PARSE_END + } + // char ach[128]; + // sprintf(ach, "# parser_call_exception_end %s: bottom.goto", name->name); + // gg_insert_into_assembler(ach); + gg_append_statement( name->structs.call_exception->bottom.go_to ); + } + +static +void +create_and_call(size_t narg, + cbl_ffi_arg_t args[], + tree function_handle, + tree returned_value_type, + cbl_refer_t returned, + cbl_label_t *not_except + ) + { + // We have a good function handle, so we are going to create a call + tree *arguments = NULL; + int *allocated = NULL; + + if(narg) + { + arguments = (tree *)xmalloc(2*narg * sizeof(tree)); + allocated = (int * )xmalloc(narg * sizeof(int)); + } + + // Put the arguments onto the "stack" of calling parameters: + for( size_t i=0; itype == FldLiteralN ) + { + crv = by_value_e; + } + + allocated[i] = 0; + + tree location = gg_define_variable(UCHAR_P, "..location.1", vs_stack); + tree length = gg_define_variable(SIZE_T, "..length.1", vs_stack); + + if( !args[i].refer.field ) + { + // The PARAMETER is OMITTED + arguments[i] = null_pointer_node; + gg_assign(gg_array_value(var_decl_call_parameter_lengths, i), + size_t_zero_node); + continue; + } + + if( refer_is_clean(args[i].refer) ) + { + if( args[i].refer.field->type == FldLiteralA ) + { + crv = by_content_e; + gg_assign(location, + gg_cast(UCHAR_P, build_string_literal(args[i].refer.field->data.capacity, + args[i].refer.field->data.initial))); + gg_assign(length, + build_int_cst_type( SIZE_T, + args[i].refer.field->data.capacity)); + } + else + { + gg_assign(location, + member(args[i].refer.field->var_decl_node, "data")); + gg_assign(length, + member(args[i].refer.field->var_decl_node, "capacity")); + } + } + else + { + gg_assign(location, + qualified_data_source(args[i].refer)), + gg_assign(length, + refer_size_source(args[i].refer)); + } + + switch( crv ) + { + case by_default_e: + gcc_unreachable(); + break; + + case by_reference_e: + { + arguments[i] = location; + + // Pass the pointer to the data location, so that the called program + // can both access and change the data. + break; + } + + case by_content_e: + { + if( (args[i].refer.field->attr & intermediate_e) + && is_valuable(args[i].refer.field->type) ) + { + cbl_unimplemented("CALL USING BY CONTENT would require " + "REPOSITORY PROTOTYPES."); + } + + // BY CONTENT means that the called program gets a copy of the data. + + // We'll free this copy after the called program returns. + + switch(args[i].attr) + { + case address_of_e: + { + // Allocate the memory, and make the copy: + arguments[i] = gg_define_char_star(); + allocated[i] = 1; + gg_assign(arguments[i], gg_malloc(length) ) ; + gg_memcpy(arguments[i], + location, + length); + break; + } + + case length_of_e: + { + // The BY CONTENT LENGTH OF gets passed as an 64-bit big-endian + // value + arguments[i] = gg_define_size_t(); + allocated[i] = 1; + gg_assign(arguments[i], gg_malloc(length) ) ; + gg_call(VOID, + "__gg__copy_as_big_endian", + gg_get_address_of(arguments[i]), + length, + NULL_TREE); + break; + } + + case none_of_e: + { + // Allocate the memory, and make the copy: + arguments[i] = gg_define_char_star(); + allocated[i] = 1; + gg_assign(arguments[i], gg_cast(CHAR_P, gg_malloc(length))) ; + gg_memcpy(arguments[i], location, length); + break; + } + } + break; + } + + case by_value_e: + { + // For BY VALUE, we take whatever we've been given and do our best to + // make a 64-bit value out of it, although we move to 128 bits when + // necessary. + switch(args[i].attr) + { + case address_of_e: + { + arguments[i] = gg_define_size_t(); + gg_assign(arguments[i], gg_cast(SIZE_T, location )); + break; + } + + case length_of_e: + { + arguments[i] = gg_define_size_t(); + gg_assign(arguments[i], gg_cast(SIZE_T, length)); + break; + } + + case none_of_e: + { + assert(args[i].refer.field); + bool as_int128 = false; + if( !(args[i].refer.field->attr & intermediate_e) ) + { + // All temporaries are SIZE_T + if( args[i].refer.field->type == FldFloat + && args[i].refer.field->data.capacity == 16 ) + { + as_int128 = true; + } + else if( args[i].refer.field->type == FldNumericBin5 + && args[i].refer.field->data.digits == 0 + && args[i].refer.field->data.capacity == 16 ) + { + as_int128 = true; + } + else if( args[i].refer.field->data.digits > 18 ) + { + as_int128 = true; + } + } + + if( as_int128 ) + { + arguments[i] = gg_define_variable(INT128); + gg_assign(arguments[i], + gg_cast(INT128, + gg_call_expr( + INT128, + "__gg__fetch_call_by_value_value", + gg_get_address_of(args[i].refer.field->var_decl_node), + refer_offset_source(args[i].refer), + refer_size_source(args[i].refer), + NULL_TREE))); + } + else + { + arguments[i] = gg_define_size_t(); + gg_assign(arguments[i], + gg_cast(SIZE_T, + gg_call_expr( + INT128, + "__gg__fetch_call_by_value_value", + gg_get_address_of(args[i].refer.field->var_decl_node), + refer_offset_source(args[i].refer), + refer_size_source(args[i].refer), + NULL_TREE))); + } + break; + } + } + } + } + // The elements in this array tell the called routine the length of each + // variable. This value is used both to handle ANY LENGTH formal + // parameters, and to provide information to the called program when being + // passed expressions BY VALUE and BY CONTENT + gg_assign(gg_array_value(var_decl_call_parameter_lengths, i),length); + } + + // Let the called program know how many parameters we are passing + gg_assign(var_decl_call_parameter_count, + build_int_cst_type(INT, narg)); + + gg_assign(var_decl_call_parameter_signature, + gg_cast(CHAR_P, function_handle)); + + tree call_expr = gg_call_expr_list( returned_value_type, + function_handle, + narg, + arguments ); + tree returned_value; + if( returned.field ) + { + returned_value = gg_define_variable(returned_value_type); + + // We are expecting a return value of type CHAR_P, SSIZE_T, SIZE_T, + // UINT128 or INT128 + push_program_state(); + gg_assign(returned_value, gg_cast(returned_value_type, call_expr)); + pop_program_state(); + + // Because the CALL had a RETURNING clause, RETURN-CODE doesn't return a + // value. So, we make sure it is zero + gg_assign(var_decl_return_code, build_int_cst_type(SHORT, 0)); + + if( returned_value_type == CHAR_P ) + { + tree returned_location = gg_define_uchar_star(); + tree returned_length = gg_define_size_t(); + // we were given a returned::field, so find its location and length: + gg_assign(returned_location, + gg_add( member(returned.field->var_decl_node, "data"), + refer_offset_dest(returned))); + gg_assign(returned_length, + refer_size_dest(returned)); + + // The returned value is a string of nbytes, which by specification + // has to be at least as long as the returned_length of the target: + IF( returned_value, + eq_op, + gg_cast(returned_value_type, null_pointer_node ) ) + { + // Somebody was discourteous enough to return a NULL pointer + // We'll jam in spaces: + gg_memset( returned_location, + char_nodes[(unsigned char)internal_space], + returned_length ); + } + ELSE + { + // There is a valid pointer. Do the assignment. + move_tree(returned.field, + refer_offset_dest(returned), + returned_value, + integer_one_node); + } + ENDIF + TRACE1 + { + TRACE1_HEADER + TRACE1_REFER("returned value: ", returned, "") + TRACE1_END + } + } + else if( returned_value_type == SSIZE_T + || returned_value_type == SIZE_T + || returned_value_type == INT128 + || returned_value_type == UINT128) + { + // We got back a 64-bit or 128-bit integer. The called and calling + // programs have to agree on size, but other than that, integer numeric + // types are converted one to the other. + gg_call(VOID, + "__gg__int128_to_qualified_field", + gg_get_address_of(returned.field->var_decl_node), + refer_offset_dest(returned), + refer_size_dest(returned), + gg_cast(INT128, returned_value), + member(returned.field->var_decl_node, "rdigits"), + build_int_cst_type(INT, truncation_e), + null_pointer_node, + NULL_TREE ); + TRACE1 + { + TRACE1_HEADER + TRACE1_REFER("returned value: ", returned, "") + TRACE1_END + } + } + else if( returned_value_type == FLOAT + || returned_value_type == DOUBLE + || returned_value_type == FLOAT128) + { + tree returned_location = gg_define_uchar_star(); + tree returned_length = gg_define_size_t(); + // we were given a returned::field, so find its location and length: + gg_assign(returned_location, + qualified_data_source(returned)); + gg_assign(returned_length, + refer_size_source(returned)); + + // We are doing float-to-float, and we require that those be identical + // one the caller and callee sides. + gg_memcpy( returned_location, + gg_get_address_of(returned_value), + returned_length); + + TRACE1 + { + TRACE1_HEADER + TRACE1_REFER("returned value: ", returned, "") + TRACE1_END + } + } + else + { + cbl_internal_error( + "%s(): What in the name of Nero's fiddle are we doing here?", + __func__); + } + } + else + { + // Because no explicit returning value is expected, we switch to + // the IBM default behavior, where the returned INT value is assigned + // to our RETURN-CODE: + returned_value = gg_define_variable(SHORT); + + // Before doing the call, we save the COBOL program_state: + push_program_state(); + gg_assign(returned_value, gg_cast(SHORT, call_expr)); + // And after the call, we restore it: + pop_program_state(); + + // We know that the returned value is a 2-byte little-endian INT: + gg_assign( var_decl_return_code, + returned_value); + TRACE1 + { + TRACE1_HEADER + gg_printf("returned value: %d", + gg_cast(INT, var_decl_return_code), + NULL_TREE); + TRACE1_END + } + } + + for( size_t i=0; istructs.call_exception->into.go_to ); + } + } + +void +parser_call( cbl_refer_t name, + cbl_refer_t returned, // This is set by RETURNING clause + size_t narg, + cbl_ffi_arg_t args[], + cbl_label_t *except, + cbl_label_t *not_except, + bool /*is_function*/) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD( " calling ", name.field) + if( except ) + { + SHOW_PARSE_TEXT(" - except is ") + SHOW_PARSE_TEXT(except->name) + } + if( not_except ) + { + SHOW_PARSE_TEXT(" - not_except is ") + SHOW_PARSE_TEXT(not_except->name) + } + SHOW_PARSE_TEXT(" (") + for(size_t i=0; istructs.call_exception->over.label); + } + + // Likewise, for a NOT ON EXCEPTION + if( not_except ) + { + SHOW_PARSE + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("not_except over.label:") + } + gg_append_statement(not_except->structs.call_exception->over.label); + } + + // We are getting close to establishing the function_type. To do that, + // we want to establish the function's return type. + +// gg_push_context(); + size_t nbytes; + tree returned_value_type = tree_type_from_field_type(returned.field, nbytes); + + tree function_handle = function_handle_from_name( name, + returned_value_type); + if( (use_static_call() && is_literal(name.field)) + || (name.field && name.field->type == FldPointer) ) + { + // If these conditions are true, then we know we have a good + // function_handle, and we don't need to check + create_and_call(narg, + args, + function_handle, + returned_value_type, + returned, + not_except + ); + } + else + { + // We might not have a good handle, so we have to check: + IF( function_handle, + ne_op, + gg_cast(TREE_TYPE(function_handle), null_pointer_node) ) + { + create_and_call(narg, + args, + function_handle, + returned_value_type, + returned, + not_except + ); + } + ELSE + { + // We have a bad function pointer, which is the except condition: + parser_exception_raise(ec_program_not_found_e); + if( except ) + { + // We have an ON EXCEPT clause: + gg_append_statement( except->structs.call_exception->into.go_to ); + // Because there is an ON EXCEPTION clause, suppress DECLARATIVE + // processing + gg_assign(var_decl_exception_code, integer_zero_node); + } + else + { + tree mangled_name = gg_define_variable(CHAR_P); + + gg_call(VOID, + "__gg__just_mangle_name", + (name.field->var_decl_node + ? gg_get_address_of(name.field->var_decl_node) + : null_pointer_node), + gg_get_address_of( mangled_name), + NULL_TREE); + + gg_printf("WARNING: %s:%d \"CALL %s\" not found" + " with no \"CALL ON EXCEPTION\" phrase\n", + gg_string_literal(current_filename.back().c_str()), + build_int_cst_type(INT, CURRENT_LINE_NUMBER), + mangled_name, + NULL_TREE); + } + } + ENDIF + } + + // Clean up the label bookkeeping + if( except ) + { + gg_append_statement( except->structs.call_exception->bottom.label ); + free( except->structs.call_exception ); + } + if( not_except ) + { + gg_append_statement( not_except->structs.call_exception->bottom.label ); + free( not_except->structs.call_exception ); + } +// gg_pop_context(); + + } + +// Set global variable to use alternative ENTRY point. +void +parser_entry_activate( size_t iprog, const cbl_label_t *declarative ) + { + assert(iprog == symbol_elem_of(declarative)->program); + } + +// Define ENTRY point with alternative LINKAGE +void +parser_entry( cbl_field_t */*name*/, size_t /*narg*/, cbl_ffi_arg_t */*args*/ ) + { + } + +void +parser_bitop( struct cbl_field_t *tgt, // tgt has to be a FldConditional + struct cbl_field_t *a, // is modified by SET,CLEAR + enum bitop_t op, + size_t bitmask ) + { + Analyze(); + // This routine is designed to set, clear, and test BITMASK bits in the + // A operand. For ON and OFF, it sets tgt, a FldConditional, to TRUE or FALSE + + // This is clumsy: The ops[] array has to match bitop_t + static const char *ops[] = { "SET", "CLEAR", "ON", "OFF", + "AND", "OR", "XOR" }; + gcc_assert( op < COUNT_OF(ops) ); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD( " switch: ", a) + fprintf(stderr, " mask: %lx", bitmask); + fprintf(stderr, " op: %s", ops[op]); + SHOW_PARSE_FIELD( " target ", tgt) + SHOW_PARSE_END + } + + if(tgt && tgt->type != FldConditional) + { + fprintf(stderr, + "%s(): The target %s has to be a FldConditional, not %s\n", + __func__, + tgt->name, + cbl_field_type_str(tgt->type)); + gcc_unreachable(); + } + + switch(op) + { + case bit_set_op: + case bit_clear_op: + // For set_on and set_off operations, the tgt is superfluous, so I + // did this code just in case the parser doesn't give us anything + // to set + gg_call(BOOL, + "__gg__bitop", + gg_get_address_of(a->var_decl_node), + build_int_cst_type(INT, op), + build_int_cst_type(SIZE_T, bitmask), + NULL_TREE ); + break; + + case bit_on_op: + case bit_off_op: + gg_assign( tgt->var_decl_node, + gg_call_expr( BOOL, + "__gg__bitop", + gg_get_address_of(a->var_decl_node), + build_int_cst_type(INT, op), + build_int_cst_type(SIZE_T, bitmask), + NULL_TREE)); + break; + + case bit_and_op: + case bit_or_op: + case bit_xor_op: + fprintf(stderr, + "%s(): The %s operation is not valid\n", + __func__, + ops[op]); + gcc_unreachable(); + break; + } + + TRACE1 + { + TRACE1_HEADER + //TRACE1_FIELD_INFO( " target ", tgt) + TRACE1_FIELD_INFO( " a ", a) + TRACE1_END + } + } + +void +parser_bitwise_op(struct cbl_field_t *tgt, + struct cbl_field_t *a, + enum bitop_t op, + size_t bitmask ) + { + Analyze(); + // This routine is a specialized TGT = A op (size_t) bitmask, where OP is + // AND, OR, or XOR. A should be an integer type. tgt should be a valid target + // for a move where an integer is the sender. + + // SET and CLEAR are straightforward. ON returns true if any bitmask bit is + // one in 'A'. OFF returns true if any bitmask bit in 'A' is zero. + + // This is clumsy: The ops[] array has to match bitop_t + static const char *ops[] = { "SET", "CLEAR", "ON", "OFF", + "AND", "OR", "XOR" }; + gcc_assert( op < COUNT_OF(ops) ); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD( " switch: ", a) + fprintf(stderr, " mask: %lx", bitmask); + fprintf(stderr, " op: %s", ops[op]); + SHOW_PARSE_FIELD( " target ", tgt) + SHOW_PARSE_END + } + + if( tgt && !is_valuable(tgt->type) && tgt->type != FldLiteralN) + { + fprintf(stderr, + "%s(): The target %s has to be is_valuable, not %s\n", + __func__, + tgt->name, + cbl_field_type_str(tgt->type)); + gcc_unreachable(); + } + + switch(op) + { + case bit_set_op: + case bit_clear_op: + case bit_on_op: + case bit_off_op: + fprintf(stderr, + "%s(): The %s operation is not valid\n", + __func__, + ops[op]); + gcc_unreachable(); + break; + + case bit_and_op: + case bit_or_op: + case bit_xor_op: + gg_call(VOID, + "__gg__bitwise_op", + gg_get_address_of(tgt->var_decl_node), + gg_get_address_of(a->var_decl_node), + build_int_cst_type(INT, op), + build_int_cst_type(SIZE_T, bitmask), + NULL_TREE ); + break; + } + + TRACE1 + { + TRACE1_HEADER + //TRACE1_FIELD_INFO( " target ", tgt) + TRACE1_FIELD_INFO( " a ", a) + TRACE1_END + } + } + +void +parser_set_pointers( size_t ntgt, cbl_refer_t *tgts, cbl_refer_t source ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" source ", source.field); + char ach[128]; + sprintf(ach, + " source.addr_of %s", + source.addr_of ? "TRUE" : "FALSE" ); + SHOW_PARSE_TEXT(ach); + for( size_t i=0; itype == FldAlphanumeric + || source.field->type == FldLiteralA)) + { + // This is something like SET varp TO ENTRY "ref". + tree function_handle = function_handle_from_name(source, + COBOL_FUNCTION_RETURN_TYPE); + gg_memcpy(qualified_data_dest(tgts[i]), + gg_get_address_of(function_handle), + build_int_cst_type(SIZE_T, sizeof(void *))); + } + else + { + if( !tgts[i].addr_of ) + { + // When not ADDRESS OF TARGET, the variable must be a POINTER + gcc_assert( tgts[i].field->type == FldPointer ); + } + else + { + // When ADDRESS OF TARGET, the target must be linkage or based + gcc_assert( tgts[i].field->attr & (linkage_e | based_e) ); + } + + gg_call( VOID, + "__gg__set_pointer", + gg_get_address_of(tgts[i].field->var_decl_node), + refer_offset_dest(tgts[i]), + build_int_cst_type(INT, tgts[i].addr_of ? REFER_T_ADDRESS_OF : 0), + source.field ? gg_get_address_of(source.field->var_decl_node) : null_pointer_node, + refer_offset_source(source), + build_int_cst_type(INT, source.addr_of ? REFER_T_ADDRESS_OF : 0), + NULL_TREE + ); + + if( tgts[i].addr_of ) + { + // When SET ADDRESS OF TARGET TO ..., the library call sets + // tgts[i].field->data. We need to propogate the data+offset + // through the level01 variable's children: + propogate_linkage_offsets(tgts[i].field, + member(tgts[i].field->var_decl_node, "data")); + } + } + } + } +typedef struct hier_node + { + size_t our_index; // In the symbol table + bool common; + struct hier_node *parent_node; + char *name; + std::vectorchild_nodes; + + hier_node() : + our_index(0), + common(false), + parent_node(NULL) + {} + } hier_node; + +static hier_node * +find_hier_node( const std::unordered_map &node_map, + size_t program_index) + { + std::unordered_map::const_iterator it = + node_map.find(program_index); + if( it == node_map.end() ) + { + return NULL; + } + return it->second; + } + +static bool +sort_by_hier_name(const hier_node *a, const hier_node *b) + { + return strcmp(a->name, b->name) < 0; + } + +static void +find_uncles(const hier_node *node, std::vector &uncles) + { + const hier_node *parent = node->parent_node; + if( parent ) + { + for(size_t i=0; ichild_nodes.size(); i++) + { + if( parent->child_nodes[i] != node ) + { + if( parent->child_nodes[i]->common ) + { + uncles.push_back(parent->child_nodes[i]); + } + } + } + find_uncles(parent, uncles); + } + } + +void +parser_program_hierarchy( const struct cbl_prog_hier_t& hier ) + { + Analyze(); + /* The complication in this routine is that it gets called near the end + of every program-id. And it keeps growing. The reason is because the + parser doesn't know when it is working on the last program of a list of + nested programs. So, we just do what we need to do, and we keep track + of what we've already built so that we don't build it more than once. + */ + SHOW_PARSE + { + SHOW_PARSE_HEADER + if( gg_trans_unit.function_stack.size() != 1 ) + { + SHOW_PARSE_TEXT("Ending a nested function") + } + else + { + for( size_t i=0; i> map_of_lists; + std::unordered_map node_map; + std::vector nodes; + + // We need to avoid duplicating names, because a direct child's name takes + // precedence over a COMMON name above us in the hierarchy: + + std::unordered_map>map_of_sets; + + // We need to build a tree out of the hierarchical structure: + // Create, essentially, a root node: + hier_node *zero_node = new hier_node; + nodes.push_back(zero_node); + node_map[0] = nodes.back(); + + // Pass 1: Create a node for every program: + for( size_t i=0; iour_index = hier.labels[i].ordinal; + new_node->common = hier.labels[i].label.common; + new_node->name = cobol_name_mangler(hier.labels[i].label.name); + nodes.push_back(new_node); + node_map[hier.labels[i].ordinal] = nodes.back(); + } + + // Pass 2: populate each node with their parent and children: + for( size_t i=0; iparent_node = parent_node; + parent_node->child_nodes.push_back(child_node); + } + + // We now build the lists of routines that can be called from every routine + + // We are going to create one vector of hier_nodes for each routine: + + for(size_t i=0; iour_index; + const hier_node *caller_node = nodes[i]; + for(size_t j=0; jchild_nodes.size(); j++) + { + map_of_lists[caller].push_back(caller_node->child_nodes[j]); + map_of_sets[caller].insert(caller_node->child_nodes[j]->name); + } + + // Sibling routines marked COMMON, and siblings of ancestors marked COMMON + // are also accessible by us. Go find them. + std::vectoruncles; + find_uncles(nodes[i], uncles); + for( size_t i=0; iname) == map_of_sets[caller].end() ) + { + // We have a COMMON uncle or sibling we haven't seen before. + map_of_lists[caller].push_back(uncle); + } + } + } + + // Having created lists of callables for each caller, we want to sort each + // of those lists to make it easier to bsearch things in them later: + for( std::unordered_map>::iterator mol = map_of_lists.begin(); + mol != map_of_lists.end(); + mol++ ) + { + std::sort(mol->second.begin(), mol->second.end(), sort_by_hier_name); + } + + // Having built the lists of lists, start pulling them apart + + tree function_type = + build_varargs_function_type_array( SIZE_T, + 0, // No parameters yet + NULL); // And, hence, no types + tree pointer_type = build_pointer_type(function_type); + + static std::unordered_setcallers; + + for( std::unordered_map>::const_iterator mol = map_of_lists.begin(); + mol != map_of_lists.end(); + mol++ ) + { + size_t caller = mol->first; + if( caller != 0 ) + { + if( callers.find(caller) == callers.end() ) + { + // We haven't seen this caller before + callers.insert(caller); + + char ach[2*sizeof(cbl_name_t)]; + tree names_table_type = build_array_type_nelts(CHAR_P, mol->second.size()+1); + sprintf(ach, "..our_accessible_functions_%ld", caller); + tree the_names_table = gg_define_variable(names_table_type, ach, vs_file_static); + + // Here is where we build a table out of constructors: + tree constructed_array_type = build_array_type_nelts(pointer_type, mol->second.size()); + sprintf(ach, "..our_constructed_table_%ld", caller); + tree the_constructed_table = gg_define_variable(constructed_array_type, ach, vs_file_static); + + tree constr_names = make_node(CONSTRUCTOR); + TREE_TYPE(constr_names) = names_table_type; + TREE_STATIC(constr_names) = 1; + TREE_CONSTANT(constr_names) = 1; + + tree constr = make_node(CONSTRUCTOR); + TREE_TYPE(constr) = constructed_array_type; + TREE_STATIC(constr) = 1; + TREE_CONSTANT(constr) = 1; + + int i=0; + for( std::vector::const_iterator callee = mol->second.begin(); + callee != mol->second.end(); + callee++ ) + { + sprintf(ach, "%s.%ld", (*callee)->name, (*callee)->parent_node->our_index); + + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr_names), + build_int_cst_type(SIZE_T, i), + build_string_literal(ach)); + + // Build the constructor element for that function: + tree function_decl = build_fn_decl (ach, function_type); + tree addr_expr = build1(ADDR_EXPR, pointer_type, function_decl); + + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + build_int_cst_type(SIZE_T, i), + addr_expr); + + i++; + } + // Terminate the names table with NULL + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr_names), + build_int_cst_type(SIZE_T, i), + null_pointer_node); + + DECL_INITIAL(the_names_table) = constr_names; + DECL_INITIAL(the_constructed_table) = constr; + + // And put a pointer to that table into the file-static variable set aside + // for it: + sprintf(ach, "..accessible_program_list_%ld", caller); + tree accessible_list_var_decl = gg_trans_unit_var_decl(ach); + gg_assign( accessible_list_var_decl, gg_get_address_of(the_names_table) ); + + sprintf(ach, "..accessible_program_pointers_%ld", caller); + tree accessible_programs_decl = gg_trans_unit_var_decl(ach); + gg_assign( accessible_programs_decl, gg_get_address_of(the_constructed_table) ); + } + } + } + gg_append_statement(label_list_back_goto); + gg_append_statement(skipper_label); + } + +void +parser_set_handled(ec_type_t ec_handled) + { + if( mode_syntax_only() ) return; + SHOW_PARSE + { + SHOW_PARSE_HEADER + char ach[64]; + sprintf(ach, "ec_type_t: 0x%lx", size_t(ec_handled)); + SHOW_PARSE_TEXT(ach); + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + if( gg_trans_unit.function_stack.size() ) + { + if( ec_handled ) + { + // We assume that exception_handled is zero, always. We only make it + // non-zero when something needs to be done. __gg__match_exception is + // in charge of setting it back to zero. + gg_assign(var_decl_exception_handled, + build_int_cst_type(INT, (int)ec_handled)); + } + } + else + { + yywarn("parser_set_handled() called between programs"); + } + } + +void +parser_set_file_number(int file_number) + { + if( mode_syntax_only() ) return; + SHOW_PARSE + { + SHOW_PARSE_HEADER + char ach[32]; + sprintf(ach, "file number: %d", file_number); + SHOW_PARSE_TEXT(ach); + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + if( gg_trans_unit.function_stack.size() ) + { + gg_assign(var_decl_exception_file_number, + build_int_cst_type(INT, file_number)); + } + else + { + yywarn("parser_set_file_number() called between programs"); + } + } + +void +parser_set_numeric(struct cbl_field_t *tgt, ssize_t value) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" set ") + SHOW_PARSE_TEXT(tgt->name) + SHOW_PARSE_TEXT(" to ") + char ach[32]; + sprintf(ach, "%ld", value); + SHOW_PARSE_TEXT(ach); + SHOW_PARSE_END + } + + gg_call(VOID, + "__gg__int128_to_field", + gg_get_address_of(tgt->var_decl_node), + build_int_cst_type(INT128, value), + integer_zero_node, + build_int_cst_type(INT, truncation_e), + null_pointer_node, + NULL_TREE ); + } + +static void +stash_exceptions( const cbl_enabled_exceptions_array_t *enabled ) + { + // We need to create a static array of bytes + size_t narg = enabled->nbytes(); + unsigned char *p = (unsigned char *)(enabled->ecs); + + static size_t prior_narg = 0; + static size_t max_narg = 128; + static unsigned char *prior_p = (unsigned char *)xmalloc(max_narg); + + bool we_got_new_data = false; + if( prior_narg != narg ) + { + we_got_new_data = true; + } + else + { + // The narg counts are the same. + for(size_t i=0; i max_narg ) + { + max_narg = narg; + prior_p = (unsigned char *)xrealloc(prior_p, max_narg); + } + + memcpy(prior_p, p, narg); + + static int count = 1; + + tree array_of_chars_type; + tree array_of_chars; + + if( narg ) + { + char ach[32]; + sprintf(ach, "_ec_array_%d", count++); + array_of_chars_type = build_array_type_nelts(UCHAR, narg); + + // We have the array. Now we need to build the constructor for it + tree constr = make_node(CONSTRUCTOR); + TREE_TYPE(constr) = array_of_chars_type; + TREE_STATIC(constr) = 1; + TREE_CONSTANT(constr) = 1; + + for(size_t i=0; inec), + narg ? gg_get_address_of(array_of_chars) : null_pointer_node, + NULL_TREE); + } + } + +static void +store_location_stuff(const cbl_name_t statement_name) + { + if( exception_location_active && !current_declarative_section_name() ) + { + // We need to establish some stuff for EXCEPTION- function processing + gg_assign(var_decl_exception_source_file, + gg_string_literal(current_filename.back().c_str())); + + gg_assign(var_decl_exception_program_id, + gg_string_literal(current_function->our_unmangled_name)); + + if( strstr(current_function->current_section->label->name, "_implicit") + != current_function->current_section->label->name ) + { + gg_assign(var_decl_exception_section, + gg_string_literal(current_function->current_section->label->name)); + } + else + { + gg_assign(var_decl_exception_section, + gg_cast(build_pointer_type(CHAR_P),null_pointer_node)); + } + + if( strstr(current_function->current_paragraph->label->name, "_implicit") + != current_function->current_paragraph->label->name ) + { + gg_assign(var_decl_exception_paragraph, + gg_string_literal(current_function->current_paragraph->label->name)); + } + else + { + gg_assign(var_decl_exception_paragraph, + gg_cast(build_pointer_type(CHAR_P), null_pointer_node)); + } + + gg_assign(var_decl_exception_source_file, + gg_string_literal(current_filename.back().c_str())); + gg_assign(var_decl_exception_line_number, build_int_cst_type(INT, + CURRENT_LINE_NUMBER)); + gg_assign(var_decl_exception_statement, gg_string_literal(statement_name)); + } + } + +void +parser_exception_prepare( const cbl_name_t statement_name, + const cbl_enabled_exceptions_array_t *enabled ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(enabled->nec? " stashing " : " skipping ") + SHOW_PARSE_TEXT(statement_name) + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + if( enabled->nec ) + { + if( gg_trans_unit.function_stack.size() ) + { + stash_exceptions(enabled); + store_location_stuff(statement_name); + } + else + { + yywarn("parser_exception_prepare() called between programs"); + } + } + } + +void +parser_exception_clear() + { + if( mode_syntax_only() ) return; + + Analyze(); + gg_assign(var_decl_exception_code, integer_zero_node); + } + +void +parser_exception_raise(ec_type_t ec) + { + Analyze(); + if( ec == ec_none_e ) + { + gg_call(VOID, + "__gg__set_exception_code", + integer_zero_node, + integer_one_node, + NULL_TREE); + } + else + { + set_exception_code_func(ec, __LINE__, 1); + } + } + +void +parser_match_exception(cbl_field_t *index, + cbl_field_t *blob ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" index ", index) + SHOW_PARSE_INDENT + if( blob ) + { + SHOW_PARSE_FIELD("blob ", blob) + } + else + { + SHOW_PARSE_TEXT("blob is NULL") + } + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_FIELD("index ", index, "") + TRACE1_INDENT + TRACE1_TEXT("blob ") + if( blob ) + { + TRACE1_TEXT(blob->name) + } + else + { + TRACE1_TEXT("is NULL") + } + TRACE1_END + } + + gg_call(VOID, + "__gg__match_exception", + gg_get_address_of(index->var_decl_node), + blob ? blob->var_decl_node : null_pointer_node, + NULL_TREE); + + TRACE1 + { + static tree index_val = gg_define_variable(INT, "..pme_index", vs_file_static); + get_binary_value(index_val, NULL, index, size_t_zero_node); + TRACE1_INDENT + gg_printf("returned value is 0x%x (%d)", index_val, index_val, NULL_TREE); + TRACE1_END + } + } + +void +parser_check_fatal_exception() + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" Check for fatal EC...") + SHOW_PARSE_END + } + gg_call(VOID, + "__gg__check_fatal_exception", + NULL_TREE); + } + +void +parser_clear_exception() + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT(" Clear raised EC...") + SHOW_PARSE_END + } + gg_call(VOID, "__gg__clear_exception", NULL_TREE); + } + +void +parser_exception_file( cbl_field_t *tgt, cbl_file_t *file) + { + Analyze(); + gg_call(VOID, + "__gg__func_exception_file", + gg_get_address_of(tgt->var_decl_node), + file ? gg_get_address_of(file->var_decl_node) : null_pointer_node, + NULL_TREE); + } + +void +parser_file_stash( struct cbl_file_t *file ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + if(file) + { + SHOW_PARSE_TEXT(" "); + SHOW_PARSE_TEXT(file->name); + } + else + { + SHOW_PARSE_TEXT(" *file is NULL ") + } + SHOW_PARSE_END + } + + if( file ) + { + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("parser_file_stash of ") + TRACE1_TEXT(file->name); + TRACE1_END + } + + gg_call(VOID, + "__gg__file_stash", + gg_get_address_of(file->var_decl_node), + NULL_TREE); + } + else + { + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT("parser_file_stash of NULL ") + TRACE1_END + } + + gg_call(VOID, + "__gg__file_stash", + null_pointer_node, + NULL_TREE); + } + } + +static void +hijack_for_development(const char *funcname) + { + /* + + To make sure that things like global symbols and whatnot get initialized, you + should probably create a source file that looks like this: + + identification division. + program-id. prog. + procedure division. + call "dubner". + end program prog. + identification division. + program-id. dubner. + procedure division. + goback. + end program dubner. + + The first program will cause all of the parser_enter_program() and + parser_division(procedure_div_e) stuff to be initialized. The second program, + named "dubner", will be hijacked and bring you here. */ + + // Assume that funcname is lowercase with no hyphens + enter_program_common(funcname, funcname); + parser_display_literal("You have been hijacked by a program named \"dubner\""); + gg_insert_into_assembler("# HIJACKED DUBNER CODE START"); + + for(int i=0; i<10; i++) + { + char ach[64]; + sprintf(ach, "Hello, world - %d", i+1); + + gg_call(VOID, + "puts", + build_string_literal(strlen(ach)+1, ach), + NULL_TREE); + } + + gg_insert_into_assembler("# HIJACKED DUBNER CODE END"); + gg_return(0); + } + +static void +conditional_abs(tree source, cbl_field_t *field) + { + Analyze(); + if( !(field->attr & signable_e) ) + { + gg_assign(source, gg_abs(source)); + } + } + +static bool +mh_identical(cbl_refer_t &destref, + cbl_refer_t &sourceref, + TREEPLET &tsource) + { + // Check to see if the two variables are identical types, thus allowing + // for a simple byte-for-byte copy of the data areas: + bool moved = false; + if( destref.field->type == sourceref.field->type + && destref.field->data.capacity == sourceref.field->data.capacity + && destref.field->data.digits == sourceref.field->data.digits + && destref.field->data.rdigits == sourceref.field->data.rdigits + && (destref.field->attr & (signable_e|separate_e|leading_e)) + == (sourceref.field->attr & (signable_e|separate_e|leading_e)) + && !destref.field->occurs.depending_on + && !sourceref.field->occurs.depending_on + && !destref.refmod.from + && !sourceref.refmod.len + && !(destref.field->attr & intermediate_e) // variables with variable + && !(sourceref.field->attr & intermediate_e) // capacities have to be + && !(destref.field->attr & any_length_e) // handled elsewhere + && !(sourceref.field->attr & any_length_e) + ) + { + // The source and destination are identical in type + if( (sourceref.field->attr & intermediate_e) || !symbol_find_odo(sourceref.field) ) + { + Analyze(); + // Source doesn't have a depending_on clause + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("mh_identical()"); + } + gg_memcpy(gg_add(member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref)), + gg_add(member(sourceref.field->var_decl_node, "data"), + tsource.offset), + build_int_cst_type(SIZE_T, sourceref.field->data.capacity)); + moved = true; + } + } + return moved; + } + +static bool +mh_source_is_literalN(cbl_refer_t &destref, + cbl_refer_t &sourceref, + bool check_for_error, + cbl_round_t rounded, + tree size_error) + { + bool moved = false; + if( sourceref.field->type == FldLiteralN ) + { + Analyze(); + switch( destref.field->type ) + { + case FldGroup: + case FldAlphanumeric: + { + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("mh_source_is_literalN: __gg__psz_to_alpha_move") + } + + static char *buffer = NULL; + static size_t buffer_size = 0; + raw_to_internal(&buffer, + &buffer_size, + sourceref.field->data.initial, + strlen(sourceref.field->data.initial)); + gg_call(VOID, + "__gg__psz_to_alpha_move", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + refer_size_dest(destref), + gg_string_literal(buffer), + build_int_cst_type(SIZE_T, strlen(sourceref.field->data.initial)), + NULL_TREE); + moved = true; + break; + } + + case FldPointer: + case FldIndex: + { + // We know this is a move to an eight-byte value: + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("mh_source_is_literalN: pointer/index") + } + + if( sourceref.field->data.capacity < 8 ) + { + // There are too few bytes in sourceref + if( sourceref.field->attr & signable_e ) + { + static tree highbyte = gg_define_variable(UCHAR, "..mh_litN_highbyte", vs_file_static); + // Pick up the source byte that has the sign bit. + gg_assign(highbyte, + gg_get_indirect_reference(gg_add(member(sourceref.field->var_decl_node, + "data"), + build_int_cst_type(SIZE_T, + sourceref.field->data.capacity-1)), + integer_zero_node)); + IF( gg_bitwise_and(highbyte, build_int_cst_type(UCHAR, 0x80)), + eq_op, + build_int_cst_type(UCHAR, 0x80) ) + { + // We are dealing with a negative number + gg_memset(gg_add(member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref)), + build_int_cst_type(UCHAR, 0xFF), + build_int_cst_type(SIZE_T, 8)); + } + ELSE + gg_memset(gg_add(member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref)), + build_int_cst_type(UCHAR, 0x00), + build_int_cst_type(SIZE_T, 8)); + ENDIF + } + else + { + // The too-short source is positive. + gg_memset(gg_add(member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref)), + build_int_cst_type(UCHAR, 0x00), + build_int_cst_type(SIZE_T, 8)); + } + } + + tree literalN_value = get_literalN_value(sourceref.field); + scale_by_power_of_ten_N(literalN_value, -sourceref.field->data.rdigits); + gg_memcpy(gg_add(member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref)), + gg_get_address_of(literalN_value), + build_int_cst_type(SIZE_T, sourceref.field->data.capacity)); + moved = true; + + break; + } + + case FldNumericBin5: + { + // We are moving from a FldLiteralN (which we know has no subscripts or + // refmods), to a NumericBin5, which might. + + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("mh_source_is_literalN: FldNumericBin5") + } + + // For now, we are ignoring intermediates: + assert( !(destref.field->attr & intermediate_e) ); + + int bytes_needed = std::max(destref.field->data.capacity, + sourceref.field->data.capacity); + tree calc_type = tree_type_from_size(bytes_needed, + sourceref.field->attr & signable_e); + tree dest_type = tree_type_from_size( destref.field->data.capacity, + destref.field->attr & signable_e); + + // Pick up the source data. + tree source = gg_define_variable(calc_type); + gg_assign(source, gg_cast(calc_type, sourceref.field->data_decl_node)); + + // Take the absolute value, if the destination is not signable + conditional_abs(source, destref.field); + + // See if it needs to be scaled: + scale_by_power_of_ten_N( + source, + destref.field->data.rdigits-sourceref.field->data.rdigits); + + if( check_for_error && size_error ) + { + Analyzer.Message("Check to see if result fits"); + if( destref.field->data.digits ) + { + __int128 power_of_ten = get_power_of_ten(destref.field->data.digits); + IF( gg_abs(source), ge_op, build_int_cst_type(calc_type, + power_of_ten) ) + { + gg_assign(size_error, gg_bitwise_or(size_error, integer_one_node)); + } + ELSE + ENDIF + } + } + + Analyzer.Message("Move to destination location"); + tree dest_location = gg_indirect( + gg_cast(build_pointer_type(dest_type), + gg_add(member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref)))); + gg_assign(dest_location, gg_cast(dest_type, source)); + moved = true; + break; + } + + case FldNumericDisplay: + case FldNumericBinary: + case FldNumericEdited: + case FldPacked: + { + static tree berror = gg_define_variable(INT, "..mh_litN_berror", vs_file_static); + gg_assign(berror, integer_zero_node); + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("calling get_literalN_value ") + } + tree literalN_value = get_literalN_value(sourceref.field); + + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("calling __gg__int128_to_qualified_field ") + } + + gg_call(INT, + "__gg__int128_to_qualified_field", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + refer_size_dest(destref), + gg_cast(INT128, literalN_value), + build_int_cst_type(INT, sourceref.field->data.rdigits), + build_int_cst_type(INT, rounded), + gg_get_address_of(berror), + NULL_TREE); + + if( size_error ) + { + IF( berror, ne_op, integer_zero_node ) + { + gg_assign(size_error, gg_bitwise_or(size_error, integer_one_node)); + } + ELSE + ENDIF + } + moved = true; + break; + } + + case FldAlphaEdited: + { + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT(" FldAlphaEdited") + } + gg_call(VOID, + "__gg__string_to_alpha_edited_ascii", + gg_add( member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref) ), + gg_string_literal(sourceref.field->data.initial), + build_int_cst_type(INT, strlen(sourceref.field->data.initial)), + gg_string_literal(destref.field->data.picture), + NULL_TREE); + moved = true; + break; + } + + case FldFloat: + { + tree tdest = gg_add(member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref) ); + switch( destref.field->data.capacity ) + { + // For some reason, using FLOAT128 in the build_pointer_type causes + // a SEGFAULT. So, we'll use other types with equivalent sizes. I + // am speculating that the use of floating-point types causes the -O0 + // compilation to move things using the mmx registers. So, I am using + // intxx types in the hope that they are simpler. + case 4: + { + // The following generated code is the exact equivalent + // of the C code: + // *(float *)dest = (float)data.value + _Float32 src = (_Float32)sourceref.field->data.value; + tree tsrc = build_string_literal(sizeof(src), (char *)&src); + gg_assign(gg_indirect(gg_cast(build_pointer_type(INT), tdest)), + gg_indirect(gg_cast(build_pointer_type(INT), tsrc ))); + break; + } + case 8: + { + _Float64 src = (_Float64)sourceref.field->data.value; + tree tsrc = build_string_literal(sizeof(src), (char *)&src); + gg_assign(gg_indirect(gg_cast(build_pointer_type(LONG), tdest)), + gg_indirect(gg_cast(build_pointer_type(LONG), tsrc ))); + break; + } + case 16: + { + _Float128 src = (_Float128)sourceref.field->data.value; + tree tsrc = build_string_literal(sizeof(src), (char *)&src); + gg_assign(gg_indirect(gg_cast(build_pointer_type(INT128), tdest)), + gg_indirect(gg_cast(build_pointer_type(INT128), tsrc ))); + break; + } + } + moved=true; + break; + } + + default: + cbl_internal_error( + "In parser_move(%s to %s), the move of FldLiteralN to %s " + "hasn't been implemented", + sourceref.field->name, + destref.field->name, + cbl_field_type_str(destref.field->type)); + break; + } + } + return moved; + } + +static +tree float_type_of(int n) + { + switch(n) + { + case 4: + return FLOAT; + case 8: + return DOUBLE; + case 16: + return FLOAT128; + default: + gcc_unreachable(); + } + return NULL_TREE; + } + +static tree +float_type_of(cbl_field_t *field) + { + gcc_assert(field->type == FldFloat); + return float_type_of(field->data.capacity); + } + +static tree +float_type_of(cbl_refer_t *refer) + { + return float_type_of(refer->field); + } + +static bool +mh_dest_is_float( cbl_refer_t &destref, + cbl_refer_t &sourceref, + TREEPLET &tsource, + cbl_round_t rounded, + tree size_error) // int + { + bool moved = false; + if( destref.field->type == FldFloat ) + { + Analyze(); + switch( sourceref.field->type ) + { + case FldPointer: + case FldIndex: + case FldNumericBin5: + case FldNumericDisplay: + case FldNumericBinary: + case FldNumericEdited: + case FldPacked: + { + switch( destref.field->data.capacity ) + { + case 4: + gg_call(VOID, + "__gg__float32_from_int128", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + tsource.pfield, + tsource.offset, + build_int_cst_type(INT, rounded), + size_error ? gg_get_address_of(size_error) : null_pointer_node, + NULL_TREE); + break; + case 8: + gg_call(VOID, + "__gg__float64_from_int128", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + tsource.pfield, + tsource.offset, + build_int_cst_type(INT, rounded), + size_error ? gg_get_address_of(size_error) : null_pointer_node, + NULL_TREE); + break; + case 16: + gg_call(VOID, + "__gg__float128_from_int128", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + tsource.pfield, + tsource.offset, + build_int_cst_type(INT, rounded), + size_error ? gg_get_address_of(size_error) : null_pointer_node, + NULL_TREE); + break; + } + moved = true; + break; + } + + case FldFloat: + { + // We are testing for size. First, we need to check to see if the + // source is INFINITY. If so, that's an automatic size error + + IF( gg_call_expr( INT, + "__gg__is_float_infinite", + tsource.pfield, + tsource.offset, + NULL_TREE), + ne_op, + integer_zero_node ) + { + if( size_error ) + { + gg_assign(size_error, integer_one_node ); + } + } + ELSE + { + // The source isn't infinite. + // If the destination is bigger than the source, then we can + // do an untested move: + + if( destref.field->data.capacity >= sourceref.field->data.capacity ) + { + tree dtype = float_type_of(&destref); + tree stype = float_type_of(&sourceref); + + tree tdest = gg_add(member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref)); + tree source = gg_add(member(sourceref.field->var_decl_node, "data"), + refer_offset_source(sourceref)); + gg_assign(gg_indirect(gg_cast(build_pointer_type(dtype), tdest)), + gg_cast(dtype, + gg_indirect(gg_cast(build_pointer_type(stype), + source)))); + } + else + { + // There are only three possible moves left: + if(destref.field->data.capacity == 8 ) + { + if( size_error ) + { + gg_assign(size_error, + gg_call_expr( INT, + "__gg__float64_from_128", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + tsource.pfield, + tsource.offset, + NULL_TREE)); + } + else + { + gg_call( INT, + "__gg__float64_from_128", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + tsource.pfield, + tsource.offset, + NULL_TREE); + } + } + else + { + // The destination has to be float32 + if( sourceref.field->data.capacity == 8 ) + { + if( size_error ) + { + gg_assign(size_error, + gg_call_expr( INT, + "__gg__float32_from_64", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + tsource.pfield, + tsource.offset, + NULL_TREE)); + } + else + { + gg_call( INT, + "__gg__float32_from_64", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + tsource.pfield, + tsource.offset, + NULL_TREE); + } + + } + else + { + if( size_error ) + { + gg_assign(size_error, + gg_call_expr( INT, + "__gg__float32_from_128", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + tsource.pfield, + tsource.offset, + NULL_TREE)); + } + else + { + gg_call( INT, + "__gg__float32_from_128", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + tsource.pfield, + tsource.offset, + NULL_TREE); + } + } + } + } + } + ENDIF + + moved = true; + break; + } + + case FldLiteralA: + case FldAlphanumeric: + { + // Alphanumeric to float is inherently slow. Send it off to the library + break; + } + + default: + cbl_internal_error("In mh_dest_is_float(%s to %s), the " + "move of %s to %s hasn't been implemented", + sourceref.field->name, + destref.field->name, + cbl_field_type_str(sourceref.field->type), + cbl_field_type_str(destref.field->type)); + break; + } + } + return moved; + } + +static void +picky_memset(tree &dest_p, unsigned char value, size_t length) + { + if( length ) + { + tree dest_ep = gg_define_variable(TREE_TYPE(dest_p)); + gg_assign(dest_ep, + gg_add( dest_p, + build_int_cst_type(SIZE_T, length))); + WHILE( dest_p, lt_op, dest_ep ) + { + gg_assign(gg_indirect(dest_p), + build_int_cst_type(UCHAR, value)); + gg_increment(dest_p); + } + WEND + } + } + +static void +picky_memcpy(tree &dest_p, tree &source_p, size_t length) + { + if( length ) + { + tree dest_ep = gg_define_variable(TREE_TYPE(dest_p)); + gg_assign(dest_ep, + gg_add( dest_p, + build_int_cst_type(SIZE_T, length))); + WHILE( dest_p, lt_op, dest_ep ) + { + gg_assign(gg_indirect(dest_p), gg_indirect(source_p)); + gg_increment(dest_p); + gg_increment(source_p); + } + WEND + } + } + +static bool +mh_numeric_display( cbl_refer_t &destref, + cbl_refer_t &sourceref, + TREEPLET &tsource, + tree size_error) + { + bool moved = false; + + if( destref.field->type == FldNumericDisplay + && sourceref.field->type == FldNumericDisplay + && !(destref.field->attr & scaled_e) + && !(sourceref.field->attr & scaled_e) ) + { + Analyze(); + // I believe that there are 225 pathways through the following code. That's + // because there are five different valid combination of signable_e, + // separate_e, and leading_e. There are three possibilities for + // sender/receiver rdigits (too many, too few, and just right), and the same + // for ldigits. 5 * 5 * 3 * 3 = 225. + + // Fasten your seat belts. + + // In order to simplify processing of a signable internal sender, we are + // going to pick up the sign byte and temporarily turn off the sign bit in + // the source data. At the end, we will restore that value. This + // reflexively makes me a bit nervous (it isn't, for example, thread-safe), + // but it makes life easier. + + static tree source_sign_loc = gg_define_variable(UCHAR_P, "..mhnd_sign_loc", vs_file_static); + static tree source_sign_byte = gg_define_variable(UCHAR, "..mhnd_sign_byte", vs_file_static); + static tree dest_p = gg_define_variable(UCHAR_P, "..mhnd_dest", vs_file_static); // The destination data pointer + static tree source_p = gg_define_variable(UCHAR_P, "..mhnd_source", vs_file_static); // The source data pointer + static tree source_ep = gg_define_variable(UCHAR_P, "..mhnd_source_e", vs_file_static); // When we need an end pointer + + gg_assign(dest_p, qualified_data_dest(destref)); + gg_assign(source_p, gg_add(member(sourceref.field, "data"), + tsource.offset)); + + if( sourceref.field->attr & signable_e ) + { + // The source is signable + + if( !(sourceref.field->attr & leading_e) ) + { + // The sign location is trailing. Whether separate or not, the location + // is the final byte of the data: + gg_assign(source_sign_loc, gg_add(member( sourceref.field->var_decl_node, "data"), + tsource.offset)), + gg_assign(source_sign_loc, + gg_add(source_sign_loc, + build_int_cst_type(SIZE_T, + sourceref.field->data.capacity-1))); + if( (sourceref.field->attr & separate_e) ) + { + // We have trailing separate + } + else + { + // We have trailing internal + } + } + else + { + // The source sign location is in the leading position. + gg_assign(source_sign_loc, + gg_add(member(sourceref.field->var_decl_node, "data"), + tsource.offset)); + if( (sourceref.field->attr & separate_e) ) + { + // We have leading separate, so the first actual digit is at + // source_p+1. + gg_increment(source_p); + } + else + { + // We have leading internal + } + } + // Pick up the byte that contains the sign data, whether internal or + // external: + gg_assign(source_sign_byte, gg_indirect(source_sign_loc)); + + if( !(sourceref.field->attr & separate_e) ) + { + // This is signable and internal, so we want to turn off the sign bit + // in the original source data + if( internal_codeset_is_ebcdic() ) + { + gg_assign(gg_indirect(source_sign_loc), + gg_bitwise_or(source_sign_byte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT))); + } + else + { + gg_assign(gg_indirect(source_sign_loc), + gg_bitwise_and( source_sign_byte, + build_int_cst_type( UCHAR, + ~NUMERIC_DISPLAY_SIGN_BIT))); + } + } + } + else + { + // The number is unsigned, so do nothing. + } + + // Let the shenanigans begin. + + // We are now ready to output the very first byte. + + // The first thing to do is see if we need to output a leading sign + // character + if( (destref.field->attr & signable_e) + && (destref.field->attr & leading_e) + && (destref.field->attr & separate_e) ) + { + // The output is signed, separate, and leading, so the first character + // needs to be either '+' or '-' + if( (sourceref.field->attr & separate_e) ) + { + // The source is signable/separate + // Oooh. Shiny. We already have that character. + gg_assign(gg_indirect(dest_p), source_sign_byte); + } + else + { + // The source is internal. Not that up above we set source_sign_byte + // even for source values that aren't signable + if( internal_codeset_is_ebcdic() ) + { + // We are working in EBCDIC + if( sourceref.field->attr & signable_e ) + { + IF( gg_bitwise_and( source_sign_byte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT)), + eq_op, + build_int_cst_type( UCHAR, 0) ) + { + // The source was negative + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, EBCDIC_MINUS)); + + } + ELSE + { + // The source was positive + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, EBCDIC_PLUS)); + } + ENDIF + } + else + { + // The source is not signable, so the result is positive + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, EBCDIC_PLUS)); + } + } + else + { + // We are working in ASCII + if( sourceref.field->attr & signable_e ) + { + IF( gg_bitwise_and( source_sign_byte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT)), + ne_op, + build_int_cst_type( UCHAR, 0) ) + { + // The source was negative + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, '-')); + + } + ELSE + { + // The source was positive + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, '+')); + } + ENDIF + } + else + { + // The source is not signable, so the result is positive + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, '+')); + } + } + } + gg_increment(dest_p); + } + + // We have the leading '+' or '-', assuming one is needed. We can + // now start outputting the digits to the left of the decimal place + + int dest_ldigits = (int)destref.field->data.digits + - destref.field->data.rdigits; + int source_ldigits = (int)sourceref.field->data.digits + - sourceref.field->data.rdigits; + + int digit_count = 0; + + if( dest_ldigits > source_ldigits ) + { + // The destination has more ldigits than the source, and needs some + // leading zeroes: + picky_memset( dest_p, + internal_codeset_is_ebcdic() ? + EBCDIC_ZERO : '0' , + dest_ldigits - source_ldigits); + // With the leading zeros set, copy over the ldigits: + digit_count = source_ldigits; + } + else if( dest_ldigits == source_ldigits ) + { + // This is the Goldilocks zone. Everything is *just* right. + digit_count = dest_ldigits; + } + else + { + // The destination is smaller than the source. We have to throw away the + // the high-order digits of the source. If any of them are non-zero, then + // we need to indicate a size error. + gg_assign(source_ep, + gg_add( source_p, + build_int_cst_type( SIZE_T, + source_ldigits-dest_ldigits))); + WHILE(source_p, lt_op, source_ep) + { + if( size_error ) + { + IF( gg_indirect(source_p), + ne_op, + build_int_cst_type( UCHAR, + internal_codeset_is_ebcdic() ? + EBCDIC_ZERO : '0') ) + { + set_exception_code(ec_size_truncation_e); + gg_assign(size_error, integer_one_node); + } + ELSE + ENDIF + } + gg_increment(source_p); + } + WEND + + // Having skipped over the leading digits, we are in position to move the + // remaining digits + digit_count = dest_ldigits; + } + + // The ldigits are in place. We now go the very similar exercise for the + // rdigits: + + int dest_rdigits = destref.field->data.rdigits; + int source_rdigits = sourceref.field->data.rdigits; + + int trailing_zeros = 0; + + if( dest_rdigits > source_rdigits ) + { + // The destination has more rdigits than the source + + // Copy over the available digits: + digit_count += source_rdigits; + + // And then tack on the needed trailing zeroes: + trailing_zeros = dest_rdigits - source_rdigits; + } + else if( dest_rdigits == source_rdigits ) + { + // This is the Goldilocks zone. Everything is *just* right. + digit_count += dest_rdigits; + } + else + { + // The destination has fewer rdigits than the source. We send + // over only the necessary rdigits, discarding the ones to the right. + digit_count += dest_rdigits; + } + + picky_memcpy(dest_p, source_p, digit_count); + picky_memset( dest_p, + internal_codeset_is_ebcdic() ? + EBCDIC_ZERO : '0' , + trailing_zeros); + + // With the digits in place, we need to sort out what to do if the target + // is signable: + if( destref.field->attr & signable_e ) + { + if( (destref.field->attr & separate_e) + && !(destref.field->attr & leading_e) ) + { + // The target is separate/trailing, so we need to tack a '+' + // or '-' character + if( sourceref.field->attr & separate_e ) + { + // The source was separate, so we already have what we need in t + // source_sign_byte: + gg_assign(gg_indirect(dest_p), source_sign_byte); + gg_increment(dest_p); + } + else + { + // The source is either internal, or unsigned + if( sourceref.field->attr & signable_e ) + { + // The source is signable/internal, so we need to extract the + // sign bit from source_sign_byte + if( internal_codeset_is_ebcdic() ) + { + IF( gg_bitwise_and( source_sign_byte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT)), + eq_op, + build_int_cst_type( UCHAR, 0) ) + { + // The source was negative + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, EBCDIC_MINUS)); + + } + ELSE + { + // The source was positive + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, EBCDIC_PLUS)); + } + ENDIF + } + else + { + IF( gg_bitwise_and( source_sign_byte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT)), + ne_op, + build_int_cst_type( UCHAR, 0) ) + { + // The source was negative + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, '-')); + + } + ELSE + { + // The source was positive + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, '+')); + } + ENDIF + } + } + else + { + // The source is unsigned, so dest is positive + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, + internal_codeset_is_ebcdic() ? + EBCDIC_PLUS : '+' )); + } + } + gg_increment(dest_p); + } + else if( !(destref.field->attr & separate_e) ) + { + // The destination is signed/internal + if( destref.field->attr & leading_e ) + { + // The sign bit goes into the first byte: + gg_assign(dest_p, qualified_data_dest(destref)); + } + else + { + // The sign bit goes into the last byte: + gg_decrement(dest_p); + } + if( sourceref.field->attr & signable_e ) + { + if( sourceref.field->attr & separate_e ) + { + // The source is separate, so source_sign_byte is '+' or '-' + IF( source_sign_byte, + eq_op, + build_int_cst_type(UCHAR, + internal_codeset_is_ebcdic() ? + EBCDIC_MINUS : '-') ) + { + // The source is negative, so turn the ASCII bit on + if( !internal_codeset_is_ebcdic() ) + { + gg_assign(gg_indirect(dest_p), + gg_bitwise_or(gg_indirect(dest_p), + build_int_cst_type( + UCHAR, + NUMERIC_DISPLAY_SIGN_BIT))); + + } + else + { + // It's ebcdic, so turn the sign bit OFF + gg_assign(gg_indirect(dest_p), + gg_bitwise_and(gg_indirect(dest_p), + build_int_cst_type( + UCHAR, + ~NUMERIC_DISPLAY_SIGN_BIT))); + } + } + ELSE + { + // The source is positive, so turn the EBCDIC bit ON: + if( internal_codeset_is_ebcdic() ) + { + gg_assign(gg_indirect(dest_p), + gg_bitwise_or(gg_indirect(dest_p), + build_int_cst_type( + UCHAR, + NUMERIC_DISPLAY_SIGN_BIT))); + } + } + ENDIF + } + else + { + // The source is signable/internal, so the sign bit is in + // source_sign_byte. Whatever it is, it has to go into dest_p: + if( internal_codeset_is_ebcdic() ) + { + // This is EBCDIC, so if the source_sign_byte bit is LOW, we + // clear that bit in dest_p high. + IF( gg_bitwise_and( source_sign_byte, + build_int_cst_type( + UCHAR, + NUMERIC_DISPLAY_SIGN_BIT)), + eq_op, + build_int_cst_type(UCHAR, 0) ) + { + // The source was negative, so make the dest negative + gg_assign(gg_indirect(dest_p), + gg_bitwise_and(gg_indirect(dest_p), + build_int_cst_type( + UCHAR, + ~NUMERIC_DISPLAY_SIGN_BIT))); + } + ELSE + ENDIF + } + else + { + // This is ASCII, so if the source_sign_byte bit is high, we + // set that bit in dest_p high. + IF( gg_bitwise_and( source_sign_byte, + build_int_cst_type( + UCHAR, + NUMERIC_DISPLAY_SIGN_BIT)), + ne_op, + build_int_cst_type(UCHAR, 0) ) + { + // The source was negative, so make the dest negative + gg_assign(gg_indirect(dest_p), + gg_bitwise_or(gg_indirect(dest_p), + build_int_cst_type( + UCHAR, + NUMERIC_DISPLAY_SIGN_BIT))); + } + ELSE + ENDIF + } + } + } + } + } + + if( (sourceref.field->attr & signable_e) + && !(sourceref.field->attr & separate_e)) + { + // The source is signable internal, so we need to restore the original + // sign byte in the original source data: + gg_assign(gg_indirect(source_sign_loc), source_sign_byte); + } + moved = true; + } + return moved; + } + +static bool +mh_little_endian( cbl_refer_t &destref, + cbl_refer_t &sourceref, + TREEPLET &tsource, + bool check_for_error, + tree size_error) + { + bool moved = false; + + cbl_figconst_t figconst = cbl_figconst_of( sourceref.field->data.initial); + + if( !figconst + && !(destref.field->attr & scaled_e) + && !(destref.field->attr & (intermediate_e )) + && !(sourceref.field->attr & (intermediate_e )) + && sourceref.field->type != FldLiteralA + && sourceref.field->type != FldAlphanumeric + && sourceref.field->type != FldNumericEdited + && sourceref.field->type != FldPacked + && ( destref.field->type == FldNumericBin5 + || destref.field->type == FldPointer + || destref.field->type == FldIndex ) ) + { + Analyze(); + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("mh_little_endian") + SHOW_PARSE_END + } + + int bytes_needed = get_bytes_needed(sourceref.field); + tree source_type = tree_type_from_size(bytes_needed, + sourceref.field->attr + & signable_e) ; + tree source = gg_define_variable(source_type); + + if( sourceref.field->type == FldFloat ) + { + get_binary_value_from_float(source, + destref, + sourceref.field, + tsource.offset); + + // Get binary value from float actually scales the source value to the + // dest:: rdigits + copy_little_endian_into_place(destref.field, + refer_offset_dest(destref), + source, + destref.field->data.rdigits, + check_for_error, + size_error); + moved = true; + } + else + { + get_binary_value( source, + NULL, + sourceref.field, + tsource.offset); + copy_little_endian_into_place(destref.field, + refer_offset_dest(destref), + source, + sourceref.field->data.rdigits, + check_for_error, + size_error); + moved = true; + } + } + return moved; + } + +static bool +mh_source_is_group( cbl_refer_t &destref, + cbl_refer_t &sourceref, + TREEPLET &tsrc) + { + bool retval = false; + if( sourceref.field->type == FldGroup && !(destref.field->attr & rjust_e) ) + { + Analyze(); + // We are moving a group to a something. The rule here is just move as + // many bytes as you can, and, if necessary, fill with spaces + tree tdest = gg_add( member(destref.field->var_decl_node, "data"), + refer_offset_dest(destref)); + tree tsource = gg_add( member(sourceref.field->var_decl_node, "data"), + tsrc.offset); + tree dbytes = refer_size_dest(destref); + tree sbytes = tsrc.length; + + IF( sbytes, ge_op, dbytes ) + { + // There are too many source bytes + gg_memcpy(tdest, tsource, dbytes); + } + ELSE + { + // There are too-few source bytes: + gg_memset(tdest, build_int_cst_type(INT, internal_space), dbytes); + gg_memcpy(tdest, tsource, sbytes); + } + ENDIF + retval = true; + } + return retval; + } + +static void +move_helper(tree size_error, // This is an INT + cbl_refer_t destref, + cbl_refer_t sourceref, // Call move_helper with this resolved. + TREEPLET &tsource, + cbl_round_t rounded, + bool check_for_error, // True means our called wants to know about truncation errors + bool restore_on_error + ) + { + Analyze(); + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("move_helper()"); + } + + bool moved = false; + + if( size_error ) + { + gg_assign(size_error, integer_zero_node); + } + + static tree stash = gg_define_variable(UCHAR_P, "..mh_stash", vs_file_static); + + tree st_data = NULL_TREE; + tree st_size = NULL_TREE; + + if( restore_on_error ) + { + // We are creating a copy of the original destination in case we clobber it + // and have to restore it because of a computational error. + bool first_time = true; + static size_t stash_size = 1024; + if( first_time ) + { + first_time = false; + gg_assign(stash, gg_cast(UCHAR_P, gg_malloc(stash_size))); + } + if( stash_size < destref.field->data.capacity ) + { + stash_size = destref.field->data.capacity; + gg_assign(stash, gg_cast(UCHAR_P, gg_realloc(stash, stash_size))); + } + st_data = qualified_data_dest(destref); + st_size = refer_size_dest(destref); + gg_memcpy(stash, + st_data, + st_size); + } + + if( (sourceref.field->attr & (linkage_e | based_e)) + || ( destref.field->attr & (linkage_e | based_e)) ) + { + //goto dont_be_clever; this will go through to the default. + } + + if( !moved ) + { + moved = mh_source_is_group(destref, sourceref, tsource); + } + + if( !moved ) + { + moved = mh_identical(destref, sourceref, tsource); + } + + if( !moved ) + { + moved = mh_source_is_literalN(destref, + sourceref, + check_for_error, + rounded, + size_error); + } + + if( !moved ) + { + moved = mh_dest_is_float( destref, + sourceref, + tsource, + rounded, + size_error); + } + + if( !moved && rounded == truncation_e ) + { + moved = mh_numeric_display( destref, + sourceref, + tsource, + size_error); + } + + if( !moved ) + { + moved = mh_little_endian( destref, + sourceref, + tsource, + restore_on_error, + size_error); + } + + if( !moved && sourceref.field->type == FldLiteralA) + { + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("__gg__move_literala") + } + + cbl_figconst_t figconst = cbl_figconst_of( sourceref.field->data.initial); + + if( destref.refmod.from + || destref.refmod.len ) + { + // Let the move routine know to treat the destination as alphanumeric + gg_attribute_bit_set(destref.field, refmod_e); + } + + static char *buffer = NULL; + static size_t buffer_size = 0; + size_t source_length = sourceref.field->data.capacity; + + if( buffer_size < source_length ) + { + buffer_size = source_length; + buffer = (char *)xrealloc(buffer, buffer_size); + } + + if( figconst ) + { + char const_char = 0xFF; // Head off a compiler warning about + // // uninitialized variables + switch(figconst) + { + case normal_value_e : + // This is not possible, it says here in the fine print. + abort(); + break; + case low_value_e : + const_char = ascii_to_internal(__gg__low_value_character); + break; + case zero_value_e : + const_char = internal_zero; + break; + case space_value_e : + const_char = internal_space; + break; + case quote_value_e : + const_char = ascii_to_internal(__gg__quote_character); + break; + case high_value_e : + const_char = ascii_to_internal(__gg__high_value_character); + break; + case null_value_e: + const_char = 0x00; + break; + } + memset(buffer, const_char, source_length); + } + else + { + memset( buffer, ascii_space, source_length); + memcpy( buffer, + sourceref.field->data.initial, + std::min(source_length, (size_t)sourceref.field->data.capacity) ); + for( size_t i=0; ivar_decl_node), + refer_offset_dest(destref), + refer_size_dest(destref), + build_int_cst_type(INT, rounded_parameter), + build_string_literal(source_length, + buffer), + build_int_cst_type( SIZE_T, source_length), + NULL_TREE)); + } + else + { + gg_call ( INT, + "__gg__move_literala", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + refer_size_dest(destref), + build_int_cst_type(INT, rounded_parameter), + build_string_literal(source_length, + buffer), + build_int_cst_type( SIZE_T, source_length), + NULL_TREE); + } + if( destref.refmod.from + || destref.refmod.len ) + { + // Return that value to its original form + gg_attribute_bit_clear(destref.field, refmod_e); + } + moved = true; + } + + if( !moved ) + { + SHOW_PARSE1 + { + SHOW_PARSE_INDENT + SHOW_PARSE_TEXT("default __gg__move") + } + + if( destref.refmod.from + || destref.refmod.len + || sourceref.refmod.from + || sourceref.refmod.len ) + { + // Let the move routine know to treat the destination as alphanumeric + gg_attribute_bit_set(destref.field, refmod_e); + } + + int nflags = (sourceref.all ? REFER_T_MOVE_ALL : 0) + + (sourceref.addr_of ? REFER_T_ADDRESS_OF : 0); + + if( size_error ) + { + gg_assign(size_error, + gg_call_expr( INT, + "__gg__move", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + refer_size_dest(destref), + tsource.pfield, + tsource.offset, + tsource.length, + build_int_cst_type(INT, nflags), + build_int_cst_type(INT, rounded), + NULL_TREE)); + } + else + { + gg_call ( INT, + "__gg__move", + gg_get_address_of(destref.field->var_decl_node), + refer_offset_dest(destref), + refer_size_dest(destref), + tsource.pfield, + tsource.offset, + tsource.length, + build_int_cst_type(INT, nflags), + build_int_cst_type(INT, rounded), + NULL_TREE); + + } + if( destref.refmod.from + || destref.refmod.len + || sourceref.refmod.from + || sourceref.refmod.len ) + { + // Return that value to its original form + gg_attribute_bit_clear(destref.field, refmod_e); + } + + moved = true; + } + + if( restore_on_error ) + { + IF(size_error, ne_op, integer_zero_node) + { + gg_memcpy(st_data, + stash, + st_size); + } + ELSE + ENDIF + } + else + { + if( check_for_error ) + { + IF(size_error, ne_op, integer_zero_node) + { + // We had a size error, but there was no restore_on_error. Pointer + // Let our lord and master know there was a truncation: + set_exception_code(ec_size_truncation_e); + } + ELSE + ENDIF + } + } + + SHOW_PARSE1 + { + SHOW_PARSE_END + } + } + +tree parser_cast_long(tree N) + { + return gg_cast(LONG, N); + } + +void +parser_print_long(tree N) + { + gg_printf("%ld", N, NULL_TREE); + } + +void +parser_print_long(const char *fmt, tree N) + { + // fmt should have a %ld/%lx in it + gg_printf(fmt, N, NULL_TREE); + } + +void +parser_print_long(long N) + { + gg_printf("%ld", build_int_cst_type(LONG, N), NULL_TREE); + } + +void +parser_print_long(const char *fmt, long N) + { + // fmt should have a %ld/%lx in it + gg_printf(fmt, build_int_cst_type(LONG, N), NULL_TREE); + } + +void +parser_print_string(const char *ach) + { + gg_printf("%s", gg_string_literal(ach), NULL_TREE); + } + +void +parser_print_string(const char *fmt, const char *ach) + { + // fmt should have a %s in it + gg_printf(fmt, gg_string_literal(ach), NULL_TREE); + } + +char * +binary_initial_from_float128(cbl_field_t *field, int rdigits, _Float128 value) + { + // This routine returns an xmalloced buffer designed to replace the + // data.initial member of the incoming field + char *retval = NULL; + char ach[128] = ""; + + // We need to adjust value so that it has no decimal places + if( rdigits ) + { + value *= get_power_of_ten(rdigits); + } + // We need to make sure that the resulting string will fit into + // a number with 'digits' digits + + // Keep in mind that pure binary types, like BINARY-CHAR, have no digits + if( field->data.digits ) + { + value = fmodf128(value, (_Float128)get_power_of_ten(field->data.digits)); + } + + // We convert it to a integer string of digits: + strfromf128(ach, sizeof(ach), "%.0f", value); + if( strcmp(ach, "-0") == 0 ) + { + // Yes, negative zero can be a thing. Let's make it go away. + strcpy(ach, "0"); + } + + retval = (char *)xmalloc(field->data.capacity); + switch(field->data.capacity) + { + case 1: + *(signed char *)retval = atoi(ach); + break; + case 2: + *(signed short *)retval = atoi(ach); + break; + case 4: + *(signed int *)retval = atoi(ach); + break; + case 8: + *(signed long *)retval = atol(ach); + break; + case 16: + { + __int128 val = 0; + bool negative = false; + for(size_t i=0; idata.digits)); + + // We convert it to a integer string of digits: + strfromf128(ach, sizeof(ach), "%.0f", value); + if( strcmp(ach, "-0") == 0 ) + { + // Yes, negative zero can be a thing. Let's make it go away. + strcpy(ach, "0"); + } + + //fprintf(stderr, "digits_from_float128() %s %f %s ", field->name, (double)value, ach); + + gcc_assert( strlen(ach) <= field->data.digits ); + if( strlen(ach) < width ) + { + memset(retval, '0', width-strlen(ach) ); + } + strcpy(retval + (width-strlen(ach)), ach); + } + +char * +initial_from_float128(cbl_field_t *field, _Float128 value) + { + Analyze(); + // This routine returns an xmalloced buffer that is intended to replace the + // data.initial member of the incoming field. + + //fprintf(stderr, "initial_from_float128 %s\n", field->name); + + char *retval = NULL; + int rdigits; + + // Let's handle the possibility of a figurative constant + cbl_figconst_t figconst = cbl_figconst_of( field->data.initial); + //cbl_figconst_t figconst = (cbl_figconst_t)(field->attr & FIGCONST_MASK); + if( figconst ) + { + int const_char = 0xFF; // Head off a compiler warning about uninitialized + // // variables + switch(figconst) + { + case normal_value_e : + // This really should never happen because normal_value_e is zero + abort(); + break; + case low_value_e : + const_char = ascii_to_internal(__gg__low_value_character); + break; + case zero_value_e : + const_char = internal_zero; + break; + case space_value_e : + const_char = internal_space; + break; + case quote_value_e : + const_char = ascii_to_internal(__gg__quote_character); + break; + case high_value_e : + if( __gg__high_value_character == DEGENERATE_HIGH_VALUE ) + { + const_char = __gg__high_value_character; + } + else + { + const_char = ascii_to_internal(__gg__high_value_character); + } + break; + case null_value_e: + break; + } + bool set_return = figconst != zero_value_e; + if( !set_return ) + { + // The figconst is zero + switch(field->type) + { + case FldGroup: + case FldAlphanumeric: + set_return = true; + break; + + default: + break; + } + } + if( set_return ) + { + retval = (char *)xmalloc(field->data.capacity); + memset(retval, const_char, field->data.capacity); + goto done; + } + } + + // There is always the infuriating possibility of a P-scaled number + if( field->attr & scaled_e ) + { + rdigits = 0; + if( field->data.rdigits >= 0 ) + { + // Suppose our PIC is PPPPPP999, meaning that field->digits + // is 3, and field->rdigits is 6. + + // Our result has no decimal places, and we have to multiply the value + // by 10**9 to get the significant bdigits where they belong. + + value *= get_power_of_ten(field->data.digits + field->data.rdigits); + } + else + { + // Suppose our target is 999PPPPPP, so there is a ->digits + // of 3 and field->rdigits of -6. + + // If our caller gave us 123000000, we need to divide + // it by 1000000 to line up the 123 with where we want it to go: + + value /= get_power_of_ten(-field->data.rdigits); + } + // Either way, we now have everything aligned for the remainder of the + // processing to work: + } + else + { + // Not P-scaled + rdigits = field->data.rdigits; + } + + switch(field->type) + { + case FldNumericBin5: + case FldIndex: + retval = binary_initial_from_float128(field, rdigits, value); + break; + + case FldNumericBinary: + { + retval = binary_initial_from_float128(field, rdigits, value); + size_t left = 0; + size_t right = field->data.capacity - 1; + while(left < right) + { + std::swap(retval[left++], retval[right--]); + } + break; + } + + case FldNumericDisplay: + { + retval = (char *)xmalloc(field->data.capacity); + char *pretval = retval; + char ach[128]; + + bool negative; + if( value < 0 ) + { + negative = true; + value = -value; + } + else + { + negative = false; + } + + digits_from_float128(ach, field, field->data.digits, rdigits, value); + + char *digits = ach; + if( (field->attr & signable_e) + && (field->attr & separate_e) + && (field->attr & leading_e ) ) + { + if( negative ) + { + *pretval++ = internal_minus; + } + else + { + *pretval++ = internal_plus; + } + } + for(size_t i=0; idata.digits; i++) + { + *pretval++ = internal_zero + ((*digits++) & 0x0F); + } + if( (field->attr & signable_e) + && (field->attr & separate_e) + && !(field->attr & leading_e ) ) + { + if( negative ) + { + *pretval++ = internal_minus; + } + else + { + *pretval++ = internal_plus; + } + } + if( (field->attr & signable_e) + && !(field->attr & separate_e) + && negative) + { + if( field->attr & leading_e ) + { + if( internal_is_ebcdic ) + { + retval[0] &= ~NUMERIC_DISPLAY_SIGN_BIT; + } + else + { + retval[0] |= NUMERIC_DISPLAY_SIGN_BIT; + } + } + else + { + if( internal_is_ebcdic ) + { + pretval[-1] &= ~NUMERIC_DISPLAY_SIGN_BIT; + } + else + { + pretval[-1] |= NUMERIC_DISPLAY_SIGN_BIT; + } + } + } + break; + } + + case FldPacked: + { + retval = (char *)xmalloc(field->data.capacity); + char *pretval = retval; + char ach[128]; + + bool negative; + if( value < 0 ) + { + negative = true; + value = -value; + } + else + { + negative = false; + } + + // For COMP-6 (flagged by separate_e), the number of required digits is + // twice the capacity. + + // For COMP-3, the number of digits is 2*capacity minus 1, because the + // the final "digit" is a sign nybble. + + size_t ndigits = (field->attr & separate_e) + ? field->data.capacity * 2 + : field->data.capacity * 2 - 1; + digits_from_float128(ach, field, ndigits, rdigits, value); + + char *digits = ach; + for(size_t i=0; iattr & separate_e) ) + { + // This is COMP-3, so put in a sign nybble + if( (field->attr & signable_e) ) + { + if( negative ) + { + *pretval++ += 0x0D; // Means signable and negative + } + else + { + *pretval++ += 0x0C; // Means signable and non-negative + } + } + else + { + *pretval++ += 0x0F; // Means not signable + } + } + break; + } + + case FldGroup: + case FldAlphanumeric: + case FldLiteralA: + case FldAlphaEdited: + { + if( field->data.initial ) + { + retval = (char *)xmalloc(field->data.capacity+1); + if( field->attr & hex_encoded_e) + { + memcpy(retval, field->data.initial, field->data.capacity); + } + else + { + size_t buffer_size = 0; + size_t length = (size_t)field->data.capacity; + memset(retval, internal_space, length); + raw_to_internal(&retval, &buffer_size, field->data.initial, length); + if( strlen(field->data.initial) < length ) + { + // If this is true, then the initial string must've been Z'xyz' + retval[strlen(field->data.initial)] = '\0'; + } + } + retval[field->data.capacity] = '\0'; + } + break; + } + + case FldNumericEdited: + { + retval = (char *)xmalloc(field->data.capacity+1); + if( field->data.initial && field->attr & quoted_e ) + { + if( field->attr & quoted_e ) + { + // What the programmer says the value is, the value becomes, no + // matter how wrong it might be. + size_t length = std::min( (size_t)field->data.capacity, + strlen(field->data.initial)); + for(size_t i=0; idata.initial[i]); + } + if( length < (size_t)field->data.capacity ) + { + memset( retval+length, + internal_space, + (size_t)field->data.capacity - length); + } + } + } + else + { + // It's not a quoted string, so we use data.value: + bool negative; + if( value < 0 ) + { + negative = true; + value = -value; + } + else + { + negative = false; + } + + char ach[128]; + memset(ach, 0, sizeof(ach)); + memset(retval, 0, field->data.capacity); + size_t ndigits = field->data.capacity; + + if( (field->attr & blank_zero_e) && value == 0 ) + { + memset(retval, internal_space, field->data.capacity); + } + else + { + digits_from_float128(ach, field, ndigits, rdigits, value); + __gg__string_to_numeric_edited( retval, + ach, + field->data.rdigits, + negative, + field->data.picture); + } + } + break; + } + + case FldFloat: + { + retval = (char *)xmalloc(field->data.capacity); + switch( field->data.capacity ) + { + case 4: + *(_Float32 *)retval = (_Float32) value; + break; + case 8: + *(_Float64 *)retval = (_Float64) value; + break; + case 16: + *(_Float128 *)retval = (_Float128) value; + break; + } + break; + } + + case FldLiteralN: + { + break; + } + + default: + break; + } + done: + return retval; + } + +static void +actually_create_the_static_field( cbl_field_t *new_var, + tree data_area, + size_t length_of_initial_string, + const char *new_initial, + tree immediate_parent, + tree new_var_decl) + { + tree constr = make_node(CONSTRUCTOR); + TREE_TYPE(constr) = cblc_field_type_node; + TREE_STATIC(constr) = 1; + TREE_CONSTANT(constr) = 1; + + tree next_field = TYPE_FIELDS(cblc_field_type_node); + // We are going to create the constructors by walking the linked + // list of FIELD_DECLs. We must do it in the same order as the + // structure creation code in create_cblc_field_t() + + // UCHAR_P, "data", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + data_area ); + next_field = TREE_CHAIN(next_field); + + // SIZE_T, "capacity", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type( SIZE_T, + new_var->data.capacity) ); + next_field = TREE_CHAIN(next_field); + + // SIZE_T, "allocated", + if( data_area != null_pointer_node ) + { + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type( SIZE_T, + new_var->data.capacity) ); + } + else + { + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type( SIZE_T, + 0) ); + } + + next_field = TREE_CHAIN(next_field); + + // SIZE_T, "offset", + + if( new_var->type == FldAlphanumeric && + new_var->attr & intermediate_e ) + { + // This is in support of FUNCTION TRIM. That function can make the capacity + // of the intermediate target smaller so that TRIM("abc ") returns + // "abc". By putting the capacity here for such variables, we have a + // mechanism for restoring it the capacity to the original. + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(SIZE_T, new_var->data.capacity)); + } + else + { + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(SIZE_T, new_var->offset) ); + } + + next_field = TREE_CHAIN(next_field); + + // CHAR_P, "name", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + gg_string_literal(new_var->name) ); + next_field = TREE_CHAIN(next_field); + + // CHAR_P, "picture", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + gg_string_literal(new_var->data.picture) ); + next_field = TREE_CHAIN(next_field); + + // CHAR_P, "initial", + if( length_of_initial_string == 0 ) + { + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + null_pointer_node ); + } + else + { + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_string_literal(length_of_initial_string, new_initial) ); + } + next_field = TREE_CHAIN(next_field); + + // CHAR_P, "parent", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + immediate_parent ? gg_get_address_of(immediate_parent) : null_pointer_node ); + next_field = TREE_CHAIN(next_field); + + // SIZE_T, "occurs_lower", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(SIZE_T, new_var->occurs.bounds.lower) ); + next_field = TREE_CHAIN(next_field); + + // SIZE_T, "occurs_upper"); + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(SIZE_T, new_var->occurs.bounds.upper) ); + next_field = TREE_CHAIN(next_field); + + // SIZE_T, "attr", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(SIZE_T, new_var->attr) ); + next_field = TREE_CHAIN(next_field); + + // SCHAR, "type", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(SCHAR, new_var->type) ); + next_field = TREE_CHAIN(next_field); + + // SCHAR, "level", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(SCHAR, new_var->level) ); + next_field = TREE_CHAIN(next_field); + + // SCHAR, "digits", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(SCHAR, new_var->data.digits) ); + next_field = TREE_CHAIN(next_field); + + // SCHAR, "rdigits", + CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr), + next_field, + build_int_cst_type(SCHAR, new_var->data.rdigits) ); + next_field = TREE_CHAIN(next_field); + + DECL_INITIAL(new_var_decl) = constr; + } + +static void +psa_global(cbl_field_t *new_var) + { + char *mname = cobol_name_mangler(new_var->name); + char ach[2*sizeof(cbl_name_t)]; + sprintf(ach, "__gg__%s", mname); + free(mname); + + if( getenv("SHOW_GLOBAL_VARIABLES") ) + { + char ach_type[32]; + strcpy(ach_type, cbl_field_type_str(new_var->type)); + + fprintf(stderr, "struct cblc_field_t %s = {\n", ach); + fprintf(stderr, " .data = NULL ,\n" ); + fprintf(stderr, " .capacity = %d ,\n", new_var->data.capacity ); + fprintf(stderr, " .offset = %ld ,\n" , new_var->offset ); + fprintf(stderr, " .name = \"%s\" ,\n" , new_var->name ); + fprintf(stderr, " .picture = \"%s\" ,\n" , new_var->data.picture ? new_var->data.picture : "" ); + if( new_var->data.initial || new_var->type == FldPointer ) + { + fprintf(stderr, " .initial = \"%s\" ,\n" , new_var->data.picture ? new_var->data.picture : "" ); + } + else + { + fprintf(stderr, " .initial = NULL ,\n" ); + } + fprintf(stderr, " .parent = NULL,\n" ); + fprintf(stderr, " .depending_on = NULL ,\n" ); + fprintf(stderr, " .depends_on = NULL ,\n" ); + fprintf(stderr, " .occurs_lower = 0 ,\n" ); + fprintf(stderr, " .occurs_upper = 0 ,\n" ); + fprintf(stderr, " .attr = 0x%lx ,\n" , new_var->attr ); + fprintf(stderr, " .type = %s ,\n" , ach_type); + fprintf(stderr, " .level = %d ,\n" , new_var->level ); + fprintf(stderr, " .digits = %d ,\n" , new_var->data.digits ); + fprintf(stderr, " .rdigits = %d ,\n" , new_var->data.rdigits ); + fprintf(stderr, " };\n"); + } + + if( strcmp(new_var->name, "_VERY_TRUE") == 0 ) + { + new_var->var_decl_node = boolean_true_node; + return; + } + if( strcmp(new_var->name, "_VERY_FALSE") == 0 ) + { + new_var->var_decl_node = boolean_false_node; + return; + } + + // global variables already have a cblc_field_t defined in constants.cc + + strcpy(ach, "__gg__"); + strcat(ach, new_var->name); + for(size_t i=0; iname, "RETURN-CODE") == 0 ) + { + strcpy(ach, "__gg___11_return_code6"); + } + + if( strcmp(new_var->name, "UPSI-0") == 0 ) + { + strcpy(ach, "__gg___6_upsi_04"); + } + + new_var->var_decl_node = gg_declare_variable(cblc_field_type_node, ach, NULL, vs_external_reference); + + // global variables already have a .data area defined. We can find that + // variable from the new_var->name. It's lower-case, with hyphens + // converted to underscores + strcpy(ach, "__gg__data_"); + strcat(ach, new_var->name); + for(size_t i=0; idata_decl_node = gg_declare_variable(UCHAR, ach, NULL, vs_external_reference); + } + +static tree +psa_new_var_decl(cbl_field_t *new_var, const char *external_record_base) + { + // This routine creates the VAR_DECL for the cblc_field_t that we are about + // to statically create. + tree new_var_decl; + + if( *external_record_base ) + { + char ach[257]; + strcpy(ach, "_"); + strcat(ach, external_record_base); + strcat(ach, "_ra"); // For "Record Area" + new_var_decl = gg_define_variable( cblc_field_type_node, + ach, + vs_external); + SET_DECL_MODE(new_var_decl, BLKmode); + } + else + { + size_t our_index = new_var->our_index; + + // During the early stages of implementing cbl_field_t::our_index, there + // were execution paths in parse.y and parser.cc that resulted in our_index + // not being set. I hereby try to use field_index() to find the index + // of this field to resolve those. I note that field_index does a linear + // search of the symbols[] table to find that index. That's why I don't + // use it routinely; it results in O(N^squared) computational complexity + // to do a linear search of the symbol table for each symbol + + if( !our_index + && new_var->type != FldLiteralN + && !(new_var->attr & intermediate_e)) + { + our_index = field_index(new_var); + if( our_index == (size_t)-1 ) + { + // Hmm. Couldn't find it. Seems odd. + our_index = 0; + } + } + + char base_name[257]; + char id_string[32] = ""; + + if( new_var->attr & external_e ) + { + // For external variables, just stick with the original name + sprintf(base_name, "%s_cblc_field", new_var->name); + } + else + { + if( our_index + && new_var->parent + && symbol_at(new_var->parent)->type == SymField ) + { + // We have a parent that is a field + sprintf(id_string, ".%ld_%ld", our_index, new_var->parent); + } + else + { + // The parent is zero, so it'll be implied: + sprintf(id_string, ".%ld", our_index); + } + + if(strcasecmp(new_var->name, "filler") == 0) + { + // Multiple "fillers" can have the same parent, so we use filler_count + // to distinguish them. We also prepend an underscore, so that + // the user can't trip us up by creating their *own* cobol variable + // named "FILLER-1" + static int filler_count = 1; + sprintf(base_name, "_filler_%d", filler_count++); + } + else if( strlen(new_var->name) == 0 ) + { + // This can happen. + static int empty_count = 1; + sprintf(base_name, + "_%s_%d", + cbl_field_type_str(new_var->type), + empty_count++); + } + else if( new_var->attr & intermediate_e ) + { + static int inter_count = 1; + sprintf(base_name, + "_%s_%s_%d", + "intermediate", + new_var->name, + inter_count++); + } + else + { + strcpy(base_name, new_var->name); + } + strcat(base_name, id_string); + } + + if( new_var->attr & external_e ) + { + //fprintf(stderr, "external_e base name is %s\n", base_name); + new_var_decl = gg_define_variable( cblc_field_type_node, + base_name, + vs_external); + SET_DECL_MODE(new_var_decl, BLKmode); + } + else if( new_var->attr & (intermediate_e) + && new_var->type != FldLiteralA + && new_var->type != FldLiteralN ) + { +// new_var_decl = gg_define_variable( cblc_field_type_node, +// base_name, +// vs_static); + new_var_decl = gg_define_variable( cblc_field_type_node, + base_name, + vs_stack); + SET_DECL_MODE(new_var_decl, BLKmode); + } + else + { + new_var_decl = gg_define_variable( cblc_field_type_node, + base_name, + vs_static); + SET_DECL_MODE(new_var_decl, BLKmode); + } + } + return new_var_decl; + } + +#if 1 +static void +psa_FldLiteralA(struct cbl_field_t *field ) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", field) + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + // We are constructing a completely static constant structure. We know the + // capacity. We'll create it from the data.initial. The cblc_field_t:data + // will be an ASCII/EBCDIC copy of the .initial data. The .initial will be + // left as ASCII. The var_decl_node will be an ordinary cblc_field_t, which + // means that at this point in time, a FldLiteralA can be used anywhere a + // FldGroup or FldAlphanumeric can be used. We are counting on the parser + // not allowing a FldLiteralA to be a left-hand-side variable. + + // First make room + static size_t buffer_size = 1024; + static char *buffer = (char *)xmalloc(buffer_size); + if( buffer_size < field->data.capacity+1 ) + { + buffer_size = field->data.capacity+1; + buffer = (char *)xrealloc(buffer, buffer_size); + } + + cbl_figconst_t figconst = cbl_figconst_of( field->data.initial ); + gcc_assert(figconst == normal_value_e); + + if( internal_codeset_is_ebcdic() ) + { + for( size_t i=0; idata.capacity; i++ ) + { + buffer[i] = ascii_to_internal(field->data.initial[i]); + } + } + else + { + memcpy(buffer, field->data.initial, field->data.capacity); + } + buffer[field->data.capacity] = '\0'; + + // We have the original nul-terminated text at data.initial. We have a + // copy of it in buffer[] in the internal codeset. + + // We will reuse a single static structure for each string + static std::unordered_map seen_before; + std::string field_string(buffer); + std::unordered_map::const_iterator it = + seen_before.find(field_string); + + static const char name_base[] = "_literal_a_"; + + if( it != seen_before.end() ) + { + // We've seen that string before. + int nvar = it->second; + char ach[32]; + sprintf(ach, "%s%d", name_base, nvar); + field->var_decl_node = gg_declare_variable(cblc_field_type_node, + ach, + NULL, + vs_file_static); + } + else + { + // We have not seen that string before + static int nvar = 1; + seen_before[field_string] = nvar; + + char ach[32]; + sprintf(ach, "%s%d", name_base, nvar); + field->var_decl_node = gg_define_variable( cblc_field_type_node, + ach, + vs_file_static); + actually_create_the_static_field( + field, + build_string_literal(field->data.capacity+1, + buffer), + field->data.capacity+1, + field->data.initial, + NULL_TREE, + field->var_decl_node); + nvar += 1; + } + TRACE1 + { + TRACE1_INDENT + TRACE1_TEXT("Finished") + TRACE1_END + } + } +#endif + +void +parser_local_add(struct cbl_field_t *new_var ) + { + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_FIELD(" ", new_var); + SHOW_PARSE_END + } + + IF( member(new_var->var_decl_node, "data"), + ne_op, + gg_cast(UCHAR_P, null_pointer_node) ) + { + gg_call(VOID, + "__gg__push_local_variable", + gg_get_address_of(new_var->var_decl_node), + NULL_TREE); + } + ELSE + ENDIF + + if( new_var->level == LEVEL01 || new_var->level == LEVEL77) + { + // We need to allocate memory on the stack for this variable + tree array_type = build_array_type_nelts(UCHAR, new_var->data.capacity); + tree data_decl_node = gg_define_variable( array_type, + NULL, + vs_stack); + gg_assign( member(new_var->var_decl_node, "data"), + gg_get_address_of(data_decl_node) ); + } + cbl_refer_t wrapper; + wrapper.field = new_var; + initialize_variable_internal(wrapper); + } + +void +parser_field_attr_set( cbl_field_t *tgt, cbl_field_attr_t attr, bool on_off ) + { + if( on_off ) + { + gg_assign(member(tgt, "attr"), + gg_bitwise_or(member(tgt, "attr"), + build_int_cst_type(SIZE_T, attr))); + } + else + { + gg_assign(member(tgt, "attr"), + gg_bitwise_and(member(tgt, "attr"), + build_int_cst_type(SIZE_T, ~attr))); + } + } + +void +parser_symbol_add(struct cbl_field_t *new_var ) + { + Analyze(); + SHOW_PARSE + { + do + { + fprintf(stderr, "( %d ) %s():", CURRENT_LINE_NUMBER, __func__); + } + while(0); + + fprintf(stderr, " %2.2d %s<%s> off:%zd " + "msiz:%d cap:%d dig:%d rdig:%d attr:0x%lx loc:%p", + new_var->level, + new_var->name, + cbl_field_type_str(new_var->type), + new_var->offset, + new_var->data.memsize, + new_var->data.capacity, + new_var->data.digits, + new_var->data.rdigits, + new_var->attr, + new_var); + + if( is_table(new_var) ) + { + fprintf(stderr," OCCURS:%zd", new_var->occurs.ntimes()); + } + cbl_field_t *parent = parent_of(new_var); + if( parent ) + { + fprintf(stderr, + " parent:(%zd)%s", + new_var->parent, + parent->name); + } + else + { + // Parent isn't a field + size_t parent_index = new_var->parent; + if( parent_index ) + { + symbol_elem_t *e = symbol_at(parent_index); + if( e->type == SymFile ) + { + fprintf(stderr, + " parent_file:(%zd)%s", + new_var->parent, + e->elem.file.name); + if( e->elem.file.attr & external_e ) + { + fprintf(stderr, " (flagged external)"); + } + } + } + } + + if( symbol_redefines(new_var) ) + { + fprintf(stderr, + " redefines:(%p)%s", + symbol_redefines(new_var), + symbol_redefines(new_var)->name); + } + + SHOW_PARSE_END + } + + if( new_var->level == 1 && new_var->occurs.bounds.upper ) + { + if( new_var->data.memsize < new_var->data.capacity * new_var->occurs.bounds.upper ) + { + cbl_internal_error("LEVEL 01 (%s) OCCURS " + "has insufficient data.memsize", new_var->name); + } + } + + if( new_var->var_decl_node ) + { + if( new_var->type != FldConditional ) + { + // There is a possibility when re-using variables that a temporary that + // was created at compile time might not have a data pointer at run time. + if( new_var->attr & (intermediate_e) ) + { + IF( member(new_var->var_decl_node, "allocated"), + lt_op, + member(new_var->var_decl_node, "capacity") ) + { + gg_free(member(new_var, "data")); + gg_assign(member(new_var, "data"), + gg_cast(UCHAR_P, gg_malloc(new_var->data.capacity))); + gg_assign(member(new_var, "allocated"), + build_int_cst_type(SIZE_T, new_var->data.capacity)); + } + ELSE + { + } + ENDIF + } + } + else + { + gg_assign(new_var->var_decl_node, boolean_false_node); + } + + goto done; + } + + if( !(new_var->attr & initialized_e) ) + { + cbl_field_type_t incoming_type = new_var->type; + + if( is_register_field(new_var) ) + { + psa_global(new_var); + goto done; + } + + if( new_var->type == FldBlob ) + { + psa_FldBlob(new_var); + goto done; + } + + if( new_var->type == FldLiteralA ) + { + new_var->data.picture = ""; + psa_FldLiteralA(new_var); + goto done; + } + + size_t length_of_initial_string = 0; + const char *new_initial = NULL; + + // gg_printf("parser_symbol_add %s\n", build_string_literal( strlen(new_var->name)+1, new_var->name), NULL_TREE); + + // If we are dealing with an alphanumeric, and it is not hex_encoded, we + // want to convert to single-byte-encoding (if it happens to be UTF-8) and + // to EBCDIC, if EBCDIC is in force: + + // Make sure we have a new variable to work with. + if( !new_var ) + { + cbl_internal_error("parser_symbol_add() was called with a NULL new_var\n"); + } + + TRACE1 + { + TRACE1_HEADER + if( new_var->level ) + { + gg_fprintf( trace_handle, + 1, + "%2.2d ", + build_int_cst_type(INT, new_var->level)); + } + TRACE1_TEXT(new_var->name) + TRACE1_TEXT_ABC(" (", cbl_field_type_str(new_var->type) ,")") + if( new_var->type == FldLiteralN) + { + gg_fprintf( trace_handle, + 1, " [%ld]", + build_int_cst_type(LONG, + *(const long *)new_var->data.initial)); + } + TRACE1_END + } + + if( is_table(new_var) && new_var->data.capacity == 0) + { + cbl_internal_error( + "%s(): %2.2d %s is a table, but it improperly has a capacity of zero", + __func__, + new_var->level, + new_var->name); + } + + cbl_field_t *ancestor = NULL; + tree immediate_parent = NULL_TREE; + + if( new_var->parent > 0 ) + { + symbol_elem_t *parent = symbol_at(new_var->parent); + gcc_assert(parent); + if( parent->type == SymField ) + { + ancestor = cbl_field_of(parent); + immediate_parent = ancestor->var_decl_node; + } + } + + if( ancestor == NULL ) + { + // This is a last ditch effort for handling SAME AREA. Although + // symbol_redefines should work for REDEFINES, LEVEL66, and SAME AREA, I + // decided to leave the existing code alone and added this in when SAME AREA + // was added in. + ancestor = symbol_redefines(new_var); + if( ancestor ) + { + immediate_parent = ancestor->var_decl_node; + + // This obscure test was put in to find problems caused by SAME AREA, + // which at one point would cause a parent to be erroneously seen after + // the child. + assert(ancestor->our_index < new_var->our_index); + } + } + + if( ancestor == new_var ) + { + cbl_internal_error("parser_symbol_add(): %s is its own ancestor", + new_var->name); + } + + if( !ancestor && (new_var->level > LEVEL01 && new_var->level <= LEVEL49 ) ) + { + cbl_internal_error("parser_symbol_add(): %2.2d %s has null ancestor", + new_var->level, + new_var->name); + } + + // new_var's var_decl_node should be NULL at this point + if( new_var->var_decl_node ) + { + cbl_internal_error( "parser_symbol_add( %s ) improperly has a non-null " + "var_decl_node\n", + new_var->name); + } + + switch( new_var->type ) + { + static int counter=1; + char ach[2*sizeof(cbl_name_t)]; + case FldConditional: + // FldConditional corresponds to a C "bool". But we don't carry + // a runtime copy of a structure for the variable; instead, + // var_decl_node becomes a boolean_type_node that is used directly. + sprintf(ach, "_%sconditional_%d", new_var->name, counter++); + new_var->var_decl_node = gg_define_variable(BOOL, ach, vs_static); + goto done; + break; + + default: + break; + } + + if( new_var->type == FldNumericBinary + || new_var->type == FldNumericBin5 ) + { + switch( new_var->data.capacity ) + { + case 1: + case 2: + case 4: + case 8: + case 16: + break; + default: + fprintf(stderr, + "%s is type %s and has capacity %u\n", + new_var->name, + cbl_field_type_str(new_var->type), + new_var->data.capacity); + gcc_unreachable(); + break; + } + } + + size_t level_88_string_size = 0; + char *level_88_string = NULL; + if( ancestor ) + { + level_88_string = get_level_88_domain(ancestor->data.capacity, new_var, level_88_string_size); + } + + if( !new_var->data.picture ) + { + // When picture is NULL, we have to keep testing for NULLness at runtime + // Force it to be a zero-length string here, so that we don't need to + // worry about it. + new_var->data.picture = ""; + } + + if( new_var->type == FldNumericEdited && (new_var->attr & scaled_e) ) + { + char *pic = xstrdup(new_var->data.picture); // duplicate the const char * + remove_p_from_picture(pic); + new_var->data.picture = pic; + } + + if( new_var->type == FldClass && new_var->level != 88 ) + { + new_var->data.initial = get_class_condition_string(new_var); + } + + if( new_var->type == FldLiteralA ) + { + length_of_initial_string = new_var->data.capacity; + } + else if( new_var->data.initial && new_var->data.initial[0] != '\0' ) + { + if( new_var->type == FldClass ) + { + length_of_initial_string = strlen(new_var->data.initial)+1; + } + else if( new_var->type == FldNumericDisplay ) + { + length_of_initial_string = strlen(new_var->data.initial)+1; + } + else + { + // This is an ordinary string + // fprintf(stderr, ">>>>>>> parser_symbol_add %s %s \n", cbl_field_type_str(new_var->type), new_var->name); + // fprintf(stderr, " %d %d\n", (int)strlen(new_var->data.initial), (int)new_var->data.capacity); + //length_of_initial_string = strlen(new_var->data.initial) + 1; + length_of_initial_string = new_var->data.capacity + 1; + } + } + else + { + // We have something that doesn't have a data.initial pointer + length_of_initial_string = 0; + } + + // GDB needs to know the data hierarchy. We do that by including our_index + // and parent index in the variable name: + + size_t our_index = new_var->our_index; + + // During the early stages of implementing cbl_field_t::our_index, there + // were execution paths in parse.y and parser.cc that resulted in our_index + // not being set. I hereby try to use field_index() to find the index + // of this field to resolve those. I note that field_index does a linear + // search of the symbols[] table to find that index. That's why I don't + // use it routinely; it results in O(N^squared) computational complexity + // to do a linear search of the symbol table for each symbol + + if( !our_index + && new_var->type != FldLiteralN + && !(new_var->attr & intermediate_e)) + { + our_index = field_index(new_var); + if( our_index == (size_t)-1 ) + { + // Hmm. Couldn't find it. Seems odd. + our_index = 0; + } + } + + // When we create the cblc_field_t structure, we need a data pointer + // for "data". In the case of a variable that has no parent, we + // have to allocate storage. In the case of a variable that has a parent, + // we calculate data as the pointer to our parent's data plus our + // offset. + + // declare and define the structure. This code *must* match + // the C structure declared in libgcobol.c. Towards that end, the + // variables are declared in descending order of size in order to + // make the packing match up. + + // This uses a single structure type_decl template for creating each structure + + char external_record_base[2*sizeof(cbl_name_t)] = ""; + + if( new_var->parent > 0 ) + { + symbol_elem_t *parent = symbol_at(new_var->parent); + gcc_assert(parent); + if( parent->type == SymField ) + { + ancestor = cbl_field_of(parent); + immediate_parent = ancestor->var_decl_node; + } + else if( parent->type == SymFile ) + { + if( parent->elem.file.attr & external_e ) + { + // The parent of new_var is a SymFile with the external_e attribute + // Therefore, we have to establish new_var as an external with a + // predictable name + strcpy(external_record_base, parent->elem.file.name); + } + } + } + + tree new_var_decl = psa_new_var_decl(new_var, external_record_base); + + if( new_var->type == FldNumericEdited ) + { + // Decide if a NumericEdited can hold negative numbers: + size_t len = strlen( new_var->data.picture); + + new_var->attr &= ~signable_e; + if( strchr(new_var->data.picture, '+') ) + { + new_var->attr |= signable_e; + } + else if( strchr(new_var->data.picture, '-') ) + { + new_var->attr |= signable_e; + } + else if( len > 2 ) + { + char ch1 = _toupper(new_var->data.picture[len-2]); + char ch2 = _toupper(new_var->data.picture[len-1]); + if( (ch1 == 'D' && ch2 == 'B') + || (ch1 == 'C' && ch2 == 'R') ) + { + new_var->attr |= signable_e; + } + } + } + + /* + * Burn after reading. (Delete comment after implementing.) + * + * As of Tue Apr 4 10:29:35 2023, we support 01 CONSTANT numeric values as follows: + * 1. FldNumericBin5 + * 2. always constant_e, also potentially global_e + * 3. compile-time value in cbl_field_data_t::value + * 4. cbl_field_data_t::capacity is 0 because it requires no working storage + */ + + if( new_var->data.capacity == 0 + && new_var->level != 88 + && new_var->type != FldClass + && new_var->type != FldLiteralN + && new_var->type != FldLiteralA ) + { + cbl_internal_error( "%s(): %2.2d %s<%s> improperly has a data.capacity of zero", + __func__, + new_var->level, + new_var->name, + cbl_field_type_str(new_var->type)); + } + + new_var->var_decl_node = new_var_decl; + + if( level_88_string ) + { + new_var->data.initial = level_88_string; + length_of_initial_string = level_88_string_size; + } + + tree data_area = null_pointer_node; + + if( *external_record_base ) + { + char achDataName[256]; + if( *external_record_base ) + { + sprintf(achDataName, "__%s_vardata", external_record_base); + } + tree array_type = build_array_type_nelts(UCHAR, new_var->data.capacity); + new_var->data_decl_node = gg_define_variable( + array_type, + achDataName, + vs_external); + data_area = gg_get_address_of(new_var->data_decl_node); + goto actual_allocate; + } + + if( ancestor && new_var->level != 0 ) + { + // This variable has an ancestor, so we share its already-allocated data + // area + new_var->data_decl_node = ancestor->data_decl_node; + } + else + { + // We have no ancestor, so data_decl_node must be allocated. Note that + // LEVEL00 variables might have ancestors (INDEXED BY variables, for + // example), but they need data allocated. + + if( new_var->type == FldLiteralN ) + { + // A numeric literal gets special handling: + psa_FldLiteralN(new_var); + data_area = gg_get_address_of(new_var->data_decl_node); + } + else + { + // Create a static array of UCHAR, and make that the data_decl_node + // size_t bytes_to_allocate = new_var->data.memsize ? + // new_var->data.memsize : new_var->data.capacity; + size_t bytes_to_allocate = std::max(new_var->data.memsize, + new_var->data.capacity); + + // A FldClass actually doesn't need any bytes, because the only important + // thing about it is the .initial field. We will allocate a single byte, + // just to keep run-time pointers from being NULL + if( (new_var->type == FldClass && bytes_to_allocate == 0) + || (new_var->type == FldLiteralA && bytes_to_allocate == 0) ) + { + bytes_to_allocate = 1; + } + + if( !bytes_to_allocate ) + { + fprintf(stderr, + "bytes_to_allocate is zero for %s (symbol number %ld)\n", + new_var->name, + new_var->our_index); + gcc_assert(bytes_to_allocate); + } + + if( new_var->type == FldIndex && new_var->level == 0 ) + { + // Do nothing, because the OCCURS INDEXED BY variable needs data + // allocated. This leaves bytes_to_allcate at its value. + } + else + { + if( new_var->attr & based_e + || new_var->attr & linkage_e + || new_var->attr & local_e ) + { + // BASED variables get their data through ALLOCATE or SET + // LINKAGE variables get their data from the caller + // LOCAL variables get their data dynamically. + bytes_to_allocate = 0; + } + } + + if( bytes_to_allocate ) + { + if( new_var->attr & (intermediate_e) + && new_var->type != FldLiteralN + && new_var->type != FldLiteralA ) + { + // We'll malloc() data in initialize_variable + data_area = null_pointer_node; + } + else + { + // We need a unique name for the allocated data for this COBOL variable: + char achDataName[256]; + if( new_var->attr & external_e ) + { + sprintf(achDataName, "%s", new_var->name); + } + else if( new_var->name[0] == '_' ) + { + // Avoid doubling up on leading underscore + sprintf(achDataName, + "%s_data_%lu", + new_var->name, + sv_data_name_counter++); + } + else + { + sprintf(achDataName, + "_%s_data_%lu", + new_var->name, + sv_data_name_counter++); + } + + if( new_var->attr & external_e ) + { + tree array_type = build_array_type_nelts(UCHAR, bytes_to_allocate); + new_var->data_decl_node = gg_define_variable( + array_type, + achDataName, + vs_external); + data_area = gg_get_address_of(new_var->data_decl_node); + } + else + { + tree array_type = build_array_type_nelts(UCHAR, bytes_to_allocate); + new_var->data_decl_node = gg_define_variable( + array_type, + achDataName, + vs_static); + data_area = gg_get_address_of(new_var->data_decl_node); + } + } + } + } + } + + if( new_var->data.initial ) + { + new_initial = initial_from_float128(new_var, new_var->data.value); + } + if( new_initial ) + { + switch(new_var->type) + { + case FldGroup: + case FldAlphanumeric: + case FldLiteralA: + length_of_initial_string = new_var->data.capacity+1; + break; + + default: + length_of_initial_string = new_var->data.capacity; + break; + } + } + else + { + new_initial = new_var->data.initial; + if( !new_initial ) + { + if( length_of_initial_string ) + { + gcc_unreachable(); + } + } + else + { + if( new_var->type == FldLiteralN ) + { + // We need to convert this string to the internal character set + // char *buffer = NULL; + // size_t buffer_size = 0; + // raw_to_internal(&buffer, + // &buffer_size, + // new_var->data.initial, + // strlen(new_var->data.initial)); + // new_initial = bufer; + // length_of_initial_string = strlen(new_var->data.initial)+1; + } + } + } + + actual_allocate: + // if( level_88_string ) + // { + // actually_create_the_static_field( new_var, + // data_area, + // level_88_string_size, + // level_88_string, + // immediate_parent, + // new_var_decl); + // } + // else + { + actually_create_the_static_field( new_var, + data_area, + length_of_initial_string, + new_initial, + immediate_parent, + new_var_decl); + } + + if( level_88_string ) + { + free(level_88_string); + } + + if( !(new_var->attr & ( linkage_e | based_e)) ) + { + static const bool explicitly = false; + static const bool just_once = true; + initialize_variable_internal( new_var, + explicitly, + just_once); + } + + if( new_var->type != incoming_type ) + { + fprintf(stderr, "Type mismatch in parser_symbol_add()\n"); + gcc_unreachable(); + } + new_var->attr |= initialized_e; + } + else + { + fprintf(stderr, "parser_symbol_add() skipping %s", new_var->name); + } + done: + return; + } diff --git a/gcc/cobol/genapi.h b/gcc/cobol/genapi.h new file mode 100644 index 00000000000..2c135e8da62 --- /dev/null +++ b/gcc/cobol/genapi.h @@ -0,0 +1,587 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _GENAPI_H_ +#define _GENAPI_H_ + +#define DISPLAY_ADVANCE true +#define DISPLAY_NO_ADVANCE false + +typedef enum + { + refer_dest, + refer_source, + } refer_type_t; + +void parser_display_internal( tree file_descriptor, + cbl_refer_t refer, + bool advance=DISPLAY_NO_ADVANCE); + +void parser_first_statement( int lineno ); + +void parser_enter_file(const char *filename); +void parser_leave_file(); +void parser_division( cbl_division_t division, + cbl_field_t *ret, size_t narg, cbl_ffi_arg_t args[] ); +void parser_enter_program(const char *funcname, bool is_function, int *retval); +void parser_leave_program(); + +void parser_accept( cbl_refer_t refer, special_name_t special_e); +void parser_accept_exception( cbl_label_t *name ); +void parser_accept_exception_end( cbl_label_t *name ); + +void parser_accept_envar( cbl_refer_t refer, cbl_refer_t envar, + cbl_label_t *error, cbl_label_t *not_error ); +void parser_set_envar( cbl_refer_t envar, cbl_refer_t refer ); + +void parser_accept_command_line( cbl_refer_t tgt, cbl_refer_t src, + cbl_label_t *error, cbl_label_t *not_error ); +void parser_accept_command_line_count( cbl_refer_t tgt ); + +void parser_accept_date_yymmdd( cbl_field_t *tgt ); +void parser_accept_date_yyyymmdd( cbl_field_t *tgt ); +void parser_accept_date_yyddd( cbl_field_t *tgt ); +void parser_accept_date_yyyyddd( cbl_field_t *tgt ); +void parser_accept_date_dow( cbl_field_t *tgt ); +void parser_accept_date_hhmmssff( cbl_field_t *tgt ); + +void +parser_alphabet( cbl_alphabet_t& alphabet ); +void +parser_alphabet_use( cbl_alphabet_t& alphabet ); + +void +parser_allocate( cbl_refer_t size_or_based, cbl_refer_t returning, bool initialized ); +void +parser_free( size_t n, cbl_refer_t refers[] ); + +void +parser_add( size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + cbl_arith_format_t format, + cbl_label_t *error, + cbl_label_t *not_error, + void *compute_error = NULL); // This has to be cast to a tree pointer to int + +void parser_arith_error( cbl_label_t *name ); +void parser_arith_error_end( cbl_label_t *name ); + +void +parser_subtract(size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + size_t nB, cbl_refer_t *B, + cbl_arith_format_t format, + cbl_label_t *error, + cbl_label_t *not_error, + void *compute_error = NULL); // This has to be cast to a tree pointer to int + +void +parser_multiply(size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + size_t nB, cbl_refer_t *B, + cbl_label_t *error, + cbl_label_t *not_error, + void *compute_error = NULL); // This has to be cast to a tree pointer to int + +void +parser_divide(size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + size_t nB, cbl_refer_t *B, + cbl_refer_t remainder, + cbl_label_t *error, + cbl_label_t *not_error, + void *compute_error = NULL); // This has to be cast to a tree pointer to int + +void +parser_add( struct cbl_refer_t tgt, + struct cbl_refer_t a, struct cbl_refer_t b, + enum cbl_round_t = truncation_e ); + +void +parser_subtract( struct cbl_refer_t tgt, + struct cbl_refer_t a, struct cbl_refer_t b, + enum cbl_round_t = truncation_e ); + +void +parser_multiply( struct cbl_refer_t tgt, + struct cbl_refer_t a, struct cbl_refer_t b, + enum cbl_round_t = truncation_e ); + +void +parser_divide( struct cbl_refer_t quotient, + struct cbl_refer_t divisor, + struct cbl_refer_t dividend, + enum cbl_round_t = truncation_e, + struct cbl_refer_t remainder = cbl_refer_t()); + +// void +// parser_exponentiation( cbl_refer_t cref, +// cbl_refer_t aref, +// cbl_refer_t bref, +// cbl_round_t rounded = truncation_e ); + +void +parser_relop( struct cbl_field_t *tgt, + struct cbl_refer_t a, enum relop_t, struct cbl_refer_t b ); + +void +parser_relop_long(struct cbl_field_t *tgt, + long a, enum relop_t, struct cbl_refer_t b ); + +void +parser_logop( struct cbl_field_t *tgt, + struct cbl_field_t *a, enum logop_t, struct cbl_field_t *b ); + +void +parser_setop( struct cbl_field_t *tgt, + struct cbl_field_t *a, enum setop_t, struct cbl_field_t *b ); + +void +parser_bitop( struct cbl_field_t *tgt, + struct cbl_field_t *a, enum bitop_t, size_t B ); + +void +parser_bitwise_op(struct cbl_field_t *tgt, + struct cbl_field_t *a, + enum bitop_t op, + size_t bitmask ); + +void +parser_classify( struct cbl_field_t *tgt, + struct cbl_refer_t srca, enum classify_t type ); + +void +parser_op( struct cbl_refer_t cref, + struct cbl_refer_t aref, int op, struct cbl_refer_t bref, + struct cbl_label_t *op_error); + +cbl_field_t +determine_intermediate_type( const cbl_refer_t& aref, + int op, + const cbl_refer_t& bref ); + +void +parser_if( struct cbl_field_t *yn ); // value is 1 or 0 +void +parser_else(void); +void +parser_fi(void); + + +void +parser_enter_paragraph( struct cbl_label_t *label ); +void +parser_leave_paragraph( cbl_label_t *label ); + +void +parser_enter_section( struct cbl_label_t *label ); +void +parser_leave_section( struct cbl_label_t *label ); + +void +parser_perform( struct cbl_label_t *label, bool suppress_nexting=false ); + +void +parser_perform_times( struct cbl_label_t *label, cbl_refer_t count ); + +void +parser_perform_start( struct cbl_perform_tgt_t *tgt ); + +void +parser_perform_conditional( struct cbl_perform_tgt_t *tgt ); + +void +parser_perform_conditional_end( struct cbl_perform_tgt_t *tgt ); + +/* + * To perform once (not a loop) N is NULL because the user didn't provide a count. + * tgt->to is NULL if the PERFORM statement has no THRU phrase. + * For an in-line loop body, tgt->from.type == LblLoop, and tgt->to is NULL. + */ +void +parser_perform( struct cbl_perform_tgt_t *tgt, struct cbl_refer_t N ); + +/* + * A simple UNTIL loop uses 1 varys element. For VARY loops, the + * VARY/AFTER phrases appear in varys in the same order as in the + * COBOL text. + */ + +// Either parser_perform_until() or parser_perform_inline_times() must appear +// after a parser_perform_start() +void +parser_perform_until( struct cbl_perform_tgt_t *tgt, + bool test_before, + size_t nvary, + struct cbl_perform_vary_t *varys ); + +void +parser_perform_inline_times(struct cbl_perform_tgt_t *tgt, + struct cbl_refer_t count ); + +void +parser_see_stop_run( struct cbl_refer_t exit_status, const char name[] ); + +void +parser_program_hierarchy( const struct cbl_prog_hier_t& hier ); +void +parser_end_program(const char *name=NULL); + +void parser_sleep(cbl_refer_t seconds); + +void parser_exit( cbl_refer_t refer, ec_type_t = ec_none_e ); +void parser_exit_section(void); +void parser_exit_paragraph(void); +void parser_exit_perform( struct cbl_perform_tgt_t *tgt, bool cycle ); +void parser_exit_program(void); // exits back to COBOL only, else continue + +void +parser_display( const struct cbl_special_name_t *upon, + struct cbl_refer_t args[], size_t n, + bool advance = DISPLAY_ADVANCE ); + +void parser_display_field(cbl_field_t *fld); + +void parser_display_literal(const char *literal, + bool advance = DISPLAY_ADVANCE); + +void +parser_assign( size_t nC, cbl_num_result_t *C, + struct cbl_refer_t from, + cbl_label_t *on_error, + cbl_label_t *not_error, + cbl_label_t *compute_error ); + +void parser_move(struct cbl_refer_t to, + struct cbl_refer_t from, + cbl_round_t rounded=truncation_e, + bool skip_fill_from = false); + +void parser_move( size_t ntgt, cbl_refer_t *tgts, + cbl_refer_t src, cbl_round_t rounded=truncation_e ); + +void parser_initialize_table( size_t ntgt, cbl_refer_t src, + size_t nspan, const cbl_bytespan_t spans[], + size_t table, // symbol table index + size_t ntbl, const cbl_subtable_t tbls[] ); + +void parser_set_pointers( size_t ntgt, cbl_refer_t *tgts, cbl_refer_t src ); + +void +parser_symbol_add(struct cbl_field_t *field); + +void +parser_initialize(struct cbl_refer_t refer, bool like_parser_symbol_add=false); + +void +parser_initialize_programs(size_t nprog, struct cbl_refer_t *progs); + +void +parser_label_label( struct cbl_label_t *label ); + +void +parser_label_goto( struct cbl_label_t *label ); + +void +parser_goto( cbl_refer_t value, size_t narg, cbl_label_t * const labels[] ); + +void +parser_alter( cbl_perform_tgt_t *tgt ); + +void +parser_set_conditional88( struct cbl_refer_t tgt, bool which_way ); +void +parser_set_numeric(struct cbl_field_t *tgt, ssize_t value); + +void +parser_field_attr_set( cbl_field_t *tgt, cbl_field_attr_t attr, bool on_off = true ); + +void +parser_file_add(struct cbl_file_t *file); + +void +parser_file_open( struct cbl_file_t *file, int mode_char ); +void +parser_file_open( size_t n, struct cbl_file_t *files[], int mode_char ); + +void +parser_file_close( struct cbl_file_t *file, file_close_how_t how = file_close_no_how_e); + +void +parser_file_read( struct cbl_file_t *file, + struct cbl_refer_t buffer, + int where ); + +void +parser_file_start( struct cbl_file_t *file, relop_t op, int flk, + cbl_refer_t = cbl_refer_t() ); + +/* + * Write *field* to *file*. *after* is a bool where false + * means BEFORE. *nlines* is the number of lines, frequently + * FldLiteralN. To indicate PAGE, nlines is the literal "PAGE", with + * quoted_e off. + * + * According to the 2014 standard, the lack of an ADVANCING clause implies + * AFTER ADVANCING 1 LINE. *nlines* is be zero to write a line without + * prepending or appending newlines. See section 14.9.47.1 paragraph 22) + * + * At present, we don't have enough information to implement PAGE + * correctly, because we don't know the page size (in lines) of the + * output device. Rather than doing nothing, we issue a 0x0C form feed + * character. + */ +void +parser_file_write( cbl_file_t *file, + cbl_field_t *source, + bool after, + cbl_refer_t& nlines, + bool sequentially); + +void +parser_file_rewrite( cbl_file_t *file, cbl_field_t *field, + bool sequentially ); + +void +parser_file_delete( cbl_file_t *file, bool sequentially ); + +#if condition_lists +struct cbl_conditional_t { + cbl_field_t *tgt; + cbl_refer_t& lhs; + unsigned int op; + cbl_refer_t& rhs; +}; +#endif + +void +parser_lsearch_start( cbl_label_t *name, + cbl_field_t *table, + cbl_field_t *index, + cbl_field_t *varying ); + +void parser_lsearch_conditional(cbl_label_t * name); +void parser_bsearch_conditional(cbl_label_t * name); + +void parser_lsearch_when( cbl_label_t *name, cbl_field_t *conditional ); +void +parser_bsearch_when(cbl_label_t *name, + cbl_refer_t key, + cbl_refer_t sarg, + bool ascending); + +void parser_lsearch_end( cbl_label_t *name ); +void parser_bsearch_end( cbl_label_t *name ); + +void +parser_bsearch_start( cbl_label_t *name, cbl_field_t *tgt ); + +void +parser_sort(cbl_refer_t table, + bool duplicates, + cbl_alphabet_t *alphabet, + size_t nkey, + cbl_key_t *keys ); +void +parser_file_sort( cbl_file_t *file, + bool duplicates, + cbl_alphabet_t *alphabet, + size_t nkey, + cbl_key_t *keys, + size_t ninput, + cbl_file_t **inputs, + size_t noutput, + cbl_file_t **outputs, + cbl_perform_tgt_t *in_proc, + cbl_perform_tgt_t *out_proc ); +void +parser_file_merge( cbl_file_t *file, + cbl_alphabet_t *alphabet, + size_t nkey, + cbl_key_t *keys, + size_t ninput, + cbl_file_t **inputs, + size_t noutput, + cbl_file_t **outputs, + cbl_perform_tgt_t *out_proc ); + +void +parser_release( cbl_field_t *record_area ); + +void +parser_exception_file( cbl_field_t *tgt, cbl_file_t* file = NULL ); + +void +parser_module_name( cbl_field_t *tgt, module_type_t type ); + +void +parser_intrinsic_numval_c( cbl_field_t *f, + cbl_refer_t& input, + bool locale, + cbl_refer_t& currency, + bool anycases, + bool test_numval_c = false); + +void +parser_intrinsic_subst( cbl_field_t *f, + cbl_refer_t& ref1, + size_t argc, + cbl_substitute_t * argv ); + +void +parser_intrinsic_callv( cbl_field_t *f, + const char name[], + size_t argc, + cbl_refer_t * argv ); + +void +parser_intrinsic_call_0( cbl_field_t *tgt, + const char name[] ); +void +parser_intrinsic_call_1( cbl_field_t *tgt, + const char name[], + cbl_refer_t& ref1 ); +void +parser_intrinsic_call_2( cbl_field_t *tgt, + const char name[], + cbl_refer_t& ref1, + cbl_refer_t& ref2 ); +void +parser_intrinsic_call_3( cbl_field_t *tgt, + const char name[], + cbl_refer_t& ref1, + cbl_refer_t& ref2, + cbl_refer_t& ref3 ); +void +parser_intrinsic_call_4( cbl_field_t *tgt, + const char name[], + cbl_refer_t& ref1, + cbl_refer_t& ref2, + cbl_refer_t& ref3, + cbl_refer_t& ref4 ); + +void +parser_string_overflow( cbl_label_t *name ); +void +parser_string_overflow_end( cbl_label_t *name ); + +void +parser_string( cbl_refer_t tgt, + cbl_refer_t pointer, + size_t nsource, + cbl_string_src_t *sources, + cbl_label_t *overflow, + cbl_label_t *not_overflow ); + +void +parser_unstring( cbl_refer_t src, + size_t ndelimited, + cbl_refer_t *delimiteds, + // into + size_t noutput, + cbl_refer_t *outputs, + cbl_refer_t *delimiters, + cbl_refer_t *counts, + cbl_refer_t pointer, + cbl_refer_t tally, + cbl_label_t *overflow, + cbl_label_t *not_overflow ); + +void parser_return_start( cbl_file_t *file, cbl_refer_t into ); +void parser_return_atend( cbl_file_t *file ); +void parser_return_notatend( cbl_file_t *file ); +void parser_return_finish( cbl_file_t *file ); + +void parser_exception_prepare( const cbl_name_t statement_name, + const cbl_enabled_exceptions_array_t *enabled ); + +//void parser_exception_condition( cbl_field_t *ec ); + +struct cbl_exception_file; +struct cbl_exception_files_t; + +void parser_exception_raise(ec_type_t ec); + +void parser_call_exception( cbl_label_t *name ); +void parser_call_exception_end( cbl_label_t *name ); + +//void parser_stash_exceptions(const cbl_enabled_exceptions_array_t *enabled); + +void parser_match_exception(cbl_field_t *index, + cbl_field_t *blob); +void parser_check_fatal_exception(); +void parser_clear_exception(); + +void parser_call_targets_dump(); +size_t parser_call_target_update( size_t caller, + const char extant[], + const char mangled_tgt[] ); + +void parser_file_stash( struct cbl_file_t *file ); + +void parser_call( cbl_refer_t name, + cbl_refer_t returning, + size_t narg, cbl_ffi_arg_t args[], + cbl_label_t *except, + cbl_label_t *not_except, + bool is_function); + +void parser_entry_activate( size_t iprog, const cbl_label_t *declarative ); + +void parser_entry( cbl_field_t *name, + size_t narg = 0, cbl_ffi_arg_t args[] = NULL); + +bool is_ascending_key(cbl_refer_t key); + +void register_main_switch(const char *main_string); + +tree parser_cast_long(tree N); +void parser_print_long(tree N); +void parser_print_long(const char *fmt, tree N); +void parser_print_long(long N); +void parser_print_long(const char *fmt, long N); // fmt needs to have a %ls in it +void parser_print_string(const char *ach); +void parser_print_string(const char *fmt, const char *ach); // fmt needs to have a %s in it +void parser_set_statement(const char *statement); + +char *initial_from_float128(cbl_field_t *field, _Float128 value); + +void parser_set_handled(ec_type_t ec_handled); +void parser_set_file_number(int file_number); +void parser_exception_clear(); + +void parser_init_list_size(int count_of_variables); +void parser_init_list_element(cbl_field_t *field); +void parser_init_list(); + +tree file_static_variable(tree type, const char *name); + +void parser_statement_begin(); + +#endif diff --git a/gcc/cobol/gengen.cc b/gcc/cobol/gengen.cc new file mode 100644 index 00000000000..c39af0b45d8 --- /dev/null +++ b/gcc/cobol/gengen.cc @@ -0,0 +1,3462 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* The compilation process consists of + + 1) lexing + 2) parsing + 3) generation of the GENERIC abstract syntax tree + 4) reduction + 5) generation of machine code + + For your sins, you have wandered into the code that accepts information from + the parser about what the COBOL source code wants done. + + Specifically, the routines in this module, which run at compile time, generate + the GENERIC tags that describe the equivalent of the COBOL. They are rathernnn + low level routines, ultimately used for pretty much everything. Specifically, + they run at compile-time, and they generate the GENERIC tags that control what + ultimately happens at run-time. + + It *is* confusing. + + I'll try to collect things in a logical way, and name them in a logical way, + and I'll try to comment them well enough so that you have some hope of + understanding what the heck is going on. + + There is some information in the GCC internals document, but it was written by + people who live and breathe this stuff, and they don't remember what it was like + to know nothing. + + I suspect that those who have tried and failed to create GCC front ends have foundered because + they just couldn't figure out what it was they needed to do. I certainly floundered + for several days before I hit on the means to figure it out. I created the + rjd_print_tree() routine, which spits out a text listing of all the nodes + connected to the specified starting node. (Keep in mind that the GENERIC graph + is cyclic, and consequently there is no real ordering, except that the starting + node you specify is NodeNumber0. rjd_print_tree follows all links, but it prints + out each unique node exactly once.) + + I then built into GCC a call to rjd_print_tree right at the point where the GENERIC tree + is complete and about to be reduced. + + And that gave me the ability to create simple C programs and see the resulting GENERIC + tree. It took a while to sort out what I was seeing, but ultimately things started + to make sense. The inherent difficulty may start to become clear when you realize that + the program + + void foo() + { + } + + is implemented by a GENERIC tree with fifty-six nodes. + + I can't try to write a whole manual here. But hopefully there will be enough examples + throughout the code for you to learn how to do things on a highish level, and you can + look at the low -level routines to see how it is accomplished. + + That said, I will try to comment things well enough to be meaningful at least to me + when I run across them at some time in the future. Because I fear that whatever + I do here, the world will little note, and *I* will not long remember, what it was! + */ + +#include "cobol-system.h" +#include "coretypes.h" +#include "tree.h" +#include "tree-iterator.h" +#include "stringpool.h" +#include "cgraph.h" +#include "toplev.h" +#include "function.h" +#include "fold-const.h" +#define HOWEVER_GCC_DEFINES_TREE 1 +#include "ec.h" +#include "common-defs.h" +#include "util.h" +#include "cbldiag.h" +#include "symbols.h" +#include "gengen.h" + +// We are limiting the programmer to functions with 512 or fewer arguments. +// Don't like it? Cry me a river. +static const int ARG_LIMIT = 512; + +static int sv_current_line_number; + +// These are globally useful constants +tree char_nodes[256]; + +tree pvoid_type_node; +tree integer_minusone_node; +tree integer_two_node; +tree integer_eight_node; +tree size_t_zero_node; +tree int128_zero_node; +tree int128_five_node; +tree int128_ten_node; +tree char_ptr_type_node; +tree uchar_ptr_type_node; +tree wchar_ptr_type_node; +tree long_double_ten_node; +tree sizeof_size_t; +tree sizeof_pointer; + +tree bool_true_node; +tree bool_false_node; + +// This is the global translation unit structure; it contains everything needed +// to compile one file that you might otherwise be tempted to instantiate as +// global variables: + +struct cbl_translation_unit_t gg_trans_unit; + +void +gg_build_translation_unit(const char *filename) + { + // The translation_unit_decl gets declared once for each processing source + // input file. It serves as an anchor for each function. And the + // block referred to by its "initial" member is the anchor for any + // variables whose scope is file. + + gg_trans_unit.trans_unit_decl + = build_translation_unit_decl(get_identifier(filename)); + + gg_trans_unit.filename = filename; + + tree tree_block = make_node(BLOCK); + BLOCK_SUPERCONTEXT(tree_block) + = gg_trans_unit.trans_unit_decl; + TREE_USED(tree_block) = 1; + DECL_INITIAL(gg_trans_unit.trans_unit_decl) = tree_block; + } + +// Explanation of context. There is a plate of spaghetti that represents +// a chain of contexts. + +// The deconstructed dinner: The function_decl "initial" points to a block +// The block points to the first of a chained set of var_decl, one for each +// variable in the block. The function "saved_tree" entry points to a +// bind_expr. The bind_expr vars member points to the same chain of var_decl. +// The bind_expr block member points to the block. And the bind_expr body +// member points to the statement_list for the context. + +// Those four tags constitute the context. To push the context, a new block +// is chained to the first blocks SUBCHAIN member. A new bind_expr is created +// and put on the statement_list of the enclosing block. And a new list of +// var_decls is set up for the new block and the new bind_expr. + +// And that's how subcontexts are made. + +static void +gg_chain_onto_block_vars(tree block, tree var) + { + // In order to use a variable in a context, the var_decl has to go + // onto the chain that starts with the "vars" entry of a block + + // Upon discovering that chainon has O(N-squared) complexity because it walks + // the entire chain looking for the final member, Dubner put in this map. + static std::unordered_mapblocks; + if( !BLOCK_VARS(block) ) + { + // This is the first variable: + BLOCK_VARS(block) = var; + blocks[block] = var; + } + else + { + //chainon(BLOCK_VARS(block), var); + // What follows is the quicker equivalent of calling chainon() + TREE_CHAIN(blocks[block]) = var; + blocks[block] = var; + } + } + +void +gg_append_var_decl(tree var_decl) + { + // The var_decl has to be chained onto the appropriate block. + + if( SCOPE_FILE_SCOPE_P(DECL_CONTEXT(var_decl)) ) + { + tree context = gg_trans_unit.trans_unit_decl; + tree block = DECL_INITIAL(context); + + gg_chain_onto_block_vars(block, var_decl); + + rest_of_decl_compilation (var_decl, true, false); + + // With global variables, it is probably necessary to do something with + // wrapup_global_declarations. At this writing, I have not yet + // investigated that. The advice from gcc@gcc.gnu.org came from + // David Malcolm: + /* + You might find libgccjit's gcc/jit/jit-playback.cc helpful for this, as + it tends to contain minimal code to build trees (generally + simplified/reverse-engineered from the C frontend). + + playback::context::global_new_decl makes the VAR_DECL node, and such + trees are added to the jit playback::context's m_globals. + In playback::context::replay, we have: + + / * Finalize globals. See how FORTRAN 95 does it in gfc_be_parse_file() + for a simple reference. * / + FOR_EACH_VEC_ELT (m_globals, i, global) + rest_of_decl_compilation (global, true, true); + + wrapup_global_declarations (m_globals.address(), m_globals.length()); + */ + + // Stash this var_decl in a map so it can be found elsewhere: + //fprintf(stderr, "Stashing %s\n", IDENTIFIER_POINTER(DECL_NAME(var_decl))); + gg_trans_unit.trans_unit_var_decls + [IDENTIFIER_POINTER(DECL_NAME(var_decl))] = var_decl; + } + else + { + // For function-level variables, we use a stack of blocks to keep track + // of which block is active for the current context: + + // fprintf(stderr, "%s(): %30s Function Scope\n", __func__, id_name); + tree bind_expr = current_function->bind_expr_stack.back(); + tree block = BIND_EXPR_BLOCK(bind_expr); + + gg_chain_onto_block_vars(block, var_decl); + + // If saved_tree.bind_expr.vars is null, then var_decl is the very + // first variable in the block, and it must be set in bind_expr as well + if( !BIND_EXPR_VARS(bind_expr) ) + { + BIND_EXPR_VARS(bind_expr) = var_decl; + } + } + } + +location_t +location_from_lineno() + { + location_t loc; + loc = linemap_line_start(line_table, sv_current_line_number, 0); + return loc; + } + +void +gg_append_statement(tree stmt) + { + // Likewise, we have a stack of statement_lists, with the current one + // at the back. (The statement_list stack can get deeper than the block + // stack, because you can create a separate statement list for the insides + // of, say, a WHILE statement without creating a whole context for it) + + // This statement list thing looks innocent enough, but it is the general + // way of actually having a GENERIC tree generate executing code. What goes + // onto a statement list is an expression. A = B is implemented with a + // modify_expr + + // Actually instantiating a variable requires a var_expr + + // A subroutine call is effected by putting a call_expr onto the statement + // list. + + // It's not the only way; you can have a modify_expr that takes a var_decl + // as a destination, and uses a call_expr as a source. This requires that + // the type of the var_decl be the same as the type of the function being + // called. + + // And so on. Just keep in mind that you have types, and declarations, and + // expressions, among other things. + + // When trying to figure out location_t, take a look at + // ./libcpp/include/line-map.h + // ./libcpp/location-example.txt + + gcc_assert( gg_trans_unit.function_stack.size() ); + + TREE_SIDE_EFFECTS(stmt) = 1; // If an expression has no side effects, + // // it won't generate code. + TREE_SIDE_EFFECTS(current_function->statement_list_stack.back()) = 1; + append_to_statement_list( stmt, &(current_function->statement_list_stack.back()) ); + } + +tree +gg_float(tree floating_type, tree integer_var) + { + // I don't know why, but this fails if 'var' is an INT128 + return build1(FLOAT_EXPR, floating_type, integer_var); + } + +tree +gg_trunc(tree integer_type, tree floating_var) + { + /* Conversion of real to fixed point by truncation. */ + return build1(FIX_TRUNC_EXPR, integer_type, floating_var); + } + +tree +gg_cast(tree type, tree var) + { + return fold_convert(type, var); + } + +static bool saw_pointer; + +static +tree +adjust_for_type(tree type) + { + tree retval; + + switch( TREE_CODE(type) ) + { + case POINTER_TYPE: + saw_pointer = true; + retval = adjust_for_type(TREE_TYPE(type)); + break; + + case COMPONENT_REF: + case ADDR_EXPR: + case ARRAY_TYPE: + case VAR_DECL: + case FUNCTION_TYPE: + retval = adjust_for_type(TREE_TYPE(type)); + break; + case RECORD_TYPE: + default: + retval = type; + break; + } + + return retval; + } + +static +char * +show_type(tree type) + { + if( !type ) + { + cbl_internal_error("The given type is not NULL, and that's just not fair"); + } + + if( DECL_P(type) ) + { + type = TREE_TYPE(type); + } + if( !TYPE_P(type) ) + { + cbl_internal_error("The given type is not a DECL or a TYPE"); + } + + static char ach[1024]; + switch( TREE_CODE(type) ) + { + case VOID_TYPE: + sprintf(ach, "VOID"); + break; + + case BOOLEAN_TYPE: + sprintf(ach, "BOOL"); + break; + + case RECORD_TYPE: + sprintf(ach, "RECORD"); + break; + + case REAL_TYPE: + sprintf(ach, + "%3ld-bit REAL", + TREE_INT_CST_LOW(TYPE_SIZE(type))); + break; + + case INTEGER_TYPE: + sprintf(ach, + "%3ld-bit %s INT", + TREE_INT_CST_LOW(TYPE_SIZE(type)), + (TYPE_UNSIGNED(type) ? "unsigned" : " signed")); + break; + + case FUNCTION_TYPE: + sprintf(ach, "FUNCTION"); +// sprintf(ach, +// "%3ld-bit %s INT", +// TREE_INT_CST_LOW(TYPE_SIZE(type)), +// (TYPE_UNSIGNED(type) ? "unsigned" : " signed")); + break; + + default: + cbl_internal_error("Unknown type %d", TREE_CODE(type)); + } + + return ach; + } + +void +gg_assign(tree dest, const tree source) + { + // This does the equivalent of a C/C++ "dest = source". When X1 is set, it + // does some checking for conditions that can result in inefficient code, so + // that is useful during development when even an astute programmer might + // need an assist with keeping variable types straight. + + // This routine also provides for the possibility that the assignment is + // for a source that is a function invocation, as in + // "dest = function_call()" + + saw_pointer = false; + tree dest_type = adjust_for_type(TREE_TYPE(dest)); + saw_pointer = false; + tree source_type = adjust_for_type(TREE_TYPE(source)); + bool p2 = saw_pointer; + + bool okay = dest_type == source_type; + + if( !okay ) + { + if( TREE_CODE(dest_type) == INTEGER_TYPE + && TREE_CODE(source_type) == INTEGER_TYPE + && TREE_INT_CST_LOW(TYPE_SIZE(dest_type)) == TREE_INT_CST_LOW(TYPE_SIZE(source_type)) + && TYPE_UNSIGNED(dest_type) == TYPE_UNSIGNED(source_type) ) + { + okay = true; + } + } + + if( okay ) + { + tree stmt = build2_loc( location_from_lineno(), + MODIFY_EXPR, + TREE_TYPE(dest), + dest, + source); + gg_append_statement(stmt); + } + else + { + // We are doing an assignment where the left- and right-hand types are not + // the same. This is a compilation-time error, since we want the caller to + // have sorted the types out explicitly. If we don't throw an error here, + // the gimple reduction will do so. Better to do it here, when we know + // where we are. + dbgmsg("Inefficient assignment"); + if(DECL_P(dest) && DECL_NAME(dest)) + { + dbgmsg(" Destination is %s", IDENTIFIER_POINTER(DECL_NAME(dest))); + } + dbgmsg(" dest type is %s%s", show_type(dest_type), p2 ? "_P" : ""); + if(DECL_P(source) && DECL_NAME(source)) + { + dbgmsg(" Source is %s", IDENTIFIER_POINTER(DECL_NAME(source))); + } + dbgmsg(" source type is %s%s", show_type(source_type), p2 ? "_P" : ""); + gcc_unreachable(); + } + } + +tree +gg_find_field_in_struct(const tree base, const char *field_name) + { + // Finds and returns the field_decl for the named member. 'base' can be + // a structure or a pointer to a structure. + tree type = TREE_TYPE(base); + tree rectype; + if( POINTER_TYPE_P (type) ) + { + tree pointer_type = TREE_TYPE(base); + rectype = TREE_TYPE(pointer_type); + } + else + { + // Assuming a struct (or union), pick up the record_type + rectype = TREE_TYPE(base); + } + + tree id_of_field = get_identifier(field_name); + + tree field_decl = NULL_TREE; + + tree next_value = TYPE_FIELDS(rectype); + + // Look through the chain of fields for a match to ours. This is, in the + // limit, an O(N^2) computational burden. But structures usually small, so we + // probably don't have to figure out how to make it faster. + while( next_value ) + { + if( DECL_NAME(next_value) == id_of_field ) + { + field_decl = next_value; + break; + } + next_value = TREE_CHAIN(next_value); + } + + if( !field_decl ) + { + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + yywarn("###### Somebody asked for the field %s.%s, which doesn't exist", + IDENTIFIER_POINTER(DECL_NAME(base)), + field_name); + gcc_unreachable(); + } + + return field_decl; + } + +static tree +gg_start_building_a_union(const char *type_name, tree type_context) + { + // type_context is current_function->function_decl for union local + // to a function. + + // It is translation_unit_decl for unions common to all functions + + // We want to return the type_decl for an empty union + + // First, create the record_type whose values will eventually + // be the chain of of the struct's fields: + + tree uniontype = make_node(UNION_TYPE); + TYPE_CONTEXT(uniontype) = type_context; + TYPE_SIZE_UNIT(uniontype) = integer_zero_node; + TYPE_SIZE(uniontype) = integer_zero_node; + TYPE_NAME(uniontype) = get_identifier(type_name); + + TYPE_MODE_RAW(uniontype) = TYPE_MODE (intTI_type_node); + + // We need a type_decl for the record_type: + tree typedecl = make_node(TYPE_DECL); + + // The type of the type_decl is the record_type: + TREE_TYPE(typedecl) = uniontype; + + SET_TYPE_ALIGN(uniontype, 16); + + // The chain element of the record_type points back to the type_decl: + TREE_CHAIN(uniontype) = typedecl; + + return typedecl; + } + +static tree +gg_start_building_a_struct(const char *type_name, tree type_context) + { + // type_context is current_function->function_decl for structures local + // to a function. + + // It is translation_unit_decl for structures common to all functions + + // We want to return the type_decl for an empty struct + + // First, create the record_type whose values will eventually + // be the chain of of the struct's fields: + + tree recordtype = make_node(RECORD_TYPE); + TYPE_CONTEXT(recordtype) = type_context; + TYPE_SIZE_UNIT(recordtype) = integer_zero_node; + TYPE_SIZE(recordtype) = integer_zero_node; + TYPE_NAME(recordtype) = get_identifier(type_name); + + TYPE_MODE_RAW(recordtype) = BLKmode; + + // We need a type_decl for the record_type: + tree typedecl = make_node(TYPE_DECL); + + // The type of the type_decl is the record_type: + TREE_TYPE(typedecl) = recordtype; + + SET_TYPE_ALIGN(recordtype, 8); + + // The chain element of the record_type points back to the type_decl: + TREE_CHAIN(recordtype) = typedecl; + + return typedecl; + } + +static void +gg_add_field_to_structure(const tree type_of_field, const char *name_of_field, tree struct_type_decl) + { + // We're given the struct_type_decl. + // Append the new field to that type_decl's record_type's chain: + tree struct_record_type = TREE_TYPE(struct_type_decl); + + bool is_union = TREE_CODE((struct_record_type)) == UNION_TYPE; + + tree id_of_field = get_identifier (name_of_field); + + // Create the new field: + tree new_field_decl = build_decl( location_from_lineno(), + FIELD_DECL, + id_of_field, + type_of_field); + + // Establish the machine mode for the field_decl: + SET_DECL_MODE(new_field_decl, TYPE_MODE(type_of_field)); + + // Establish the context of the new field as being the record_type + DECL_CONTEXT (new_field_decl) = struct_record_type; + + // Establish the size of the new field as being the same as its prototype: + DECL_SIZE(new_field_decl) = TYPE_SIZE(type_of_field); // This is in bits + DECL_SIZE_UNIT(new_field_decl) = TYPE_SIZE_UNIT(type_of_field); // This is in bytes + + // We need to establish the offset and bit offset of the new node. + // Empirically, this seems to be done on 16-bit boundaries, with DECL_FIELD_OFFSET + // in units of N*16 bytes, and FIELD_BIT_OFFSET being offsets in bits from the DECL_FIELD_OFFSET + + // We calculate our desired offset in bits: + + // Pick up the current size, in bytes, of the record_type: + long offset_in_bytes = TREE_INT_CST_LOW(TYPE_SIZE_UNIT(struct_record_type)); + + static const int MAGIC_NUMBER_SIXTEEN = 16 ; + static const int BITS_IN_A_BYTE = 8 ; + + // We know the offset_in_bytes, which is the size, of the structure with + // its current members. + + //long type_size = TREE_INT_CST_LOW(TYPE_SIZE_UNIT(type_of_field)); + long type_align_in_bits = TYPE_ALIGN(type_of_field); + long type_align_in_bytes = type_align_in_bits/BITS_IN_A_BYTE; + + // As per the Amd64 ABI, we need to set the structure's type alignment to be + // that of most strictly aligned component: + // This is the current restriction: + long struct_align_in_bits = TYPE_ALIGN(TREE_TYPE(struct_type_decl)); + if( type_align_in_bits > struct_align_in_bits ) + { + // The new one is the new champion + SET_TYPE_ALIGN(TREE_TYPE(struct_type_decl), type_align_in_bits ); + } + + // We know struct_type_decl is a record_type, so we can sneak through this comparison + if( type_of_field == TREE_TYPE(struct_type_decl) ) + { + printf(" It is a record_type\n"); + } + + // Bump up the offset until we are aligned: + while( offset_in_bytes % type_align_in_bytes) + { + offset_in_bytes += 1; + } + + if( is_union ) + { + // Turn that into the bytes/bits offsets of the new field: + DECL_FIELD_OFFSET(new_field_decl) = build_int_cst_type (SIZE_T, 0); + DECL_FIELD_BIT_OFFSET(new_field_decl) = build_int_cst_type (bitsizetype, 0); + + // The size of a union is the size of its largest member: + offset_in_bytes = std::max(offset_in_bytes, (long)TREE_INT_CST_LOW(DECL_SIZE_UNIT(new_field_decl))); + } + else + { + // Turn that into the bytes/bits offsets of the new field: + long field_offset = (offset_in_bytes/MAGIC_NUMBER_SIXTEEN)*MAGIC_NUMBER_SIXTEEN; + long field_bit_offset = (offset_in_bytes - field_offset) * BITS_IN_A_BYTE; + DECL_FIELD_OFFSET(new_field_decl) = build_int_cst_type (SIZE_T, field_offset);; + DECL_FIELD_BIT_OFFSET(new_field_decl) = build_int_cst_type (bitsizetype, field_bit_offset); + + // This was done empirically to make our generated code match that of a C program + SET_DECL_OFFSET_ALIGN(new_field_decl, 128); + + // And now we need to update the size of the record type: + offset_in_bytes += TREE_INT_CST_LOW(DECL_SIZE_UNIT(new_field_decl)); + } + + TYPE_SIZE_UNIT(struct_record_type) = build_int_cst_type (SIZE_T, offset_in_bytes); // In bytes + TYPE_SIZE(struct_record_type) = build_int_cst_type (bitsizetype, offset_in_bytes*BITS_IN_A_BYTE); // In bits + + if( !TYPE_FIELDS(struct_record_type) ) + { + // This is the first variable of the chain: + TYPE_FIELDS(struct_record_type) = new_field_decl; + } + else + { + // We need to tack the new one onto an already existing chain: + chainon(TYPE_FIELDS(struct_record_type), new_field_decl); + } + } + +void +gg_get_struct_type_decl(tree struct_type_decl, int count, va_list params) + { + while( count-- ) + { + tree field_type = va_arg(params, tree); + const char *name = va_arg(params, const char *); + gg_add_field_to_structure(field_type, name, struct_type_decl); + } + // Note: On 2022-02-18 I removed the call to gg_append_var_decl, which + // chains the type_decl on the function block. I don't remember why I + // thought it was necessary. It makes no difference for COBOL compilations. + // + // But I must have copied it from a C compilation example. + // + // I removed it so that I could create type_decls outside of a function. + // I know not what the long-term implications might be. + // + // You have been served notice. + // + // struct_type_decl is the type_decl for our structure. We need to + // append it to the list of variables in order to use it: + // The following function call is misnamed. It can take struct type_decls + //gg_append_var_decl(struct_type_decl); + } + +void +gg_get_union_type_decl(tree union_type_decl, int count, va_list params) + { + while( count-- ) + { + tree field_type = va_arg(params, tree); + const char *name = va_arg(params, const char *); + gg_add_field_to_structure(field_type, name, union_type_decl); + } + } + +tree +gg_get_local_struct_type_decl(const char *type_name, int count, ...) + { + tree struct_type_decl = gg_start_building_a_struct(type_name, current_function->function_decl); + + va_list params; + va_start(params, count); + + gg_get_struct_type_decl(struct_type_decl, count, params); + + va_end(params); + + // To use the struct_type_decl, you'll need to execute + // the following to turn it into a var_decl: + // tree var_decl = gg_define_variable( TREE_TYPE(struct_type_decl), + // var_name, + // vs_static); + return struct_type_decl; + } + +tree +gg_get_filelevel_struct_type_decl(const char *type_name, int count, ...) + { + tree struct_type_decl = gg_start_building_a_struct(type_name, gg_trans_unit.trans_unit_decl); + + va_list params; + va_start(params, count); + + gg_get_struct_type_decl(struct_type_decl, count, params); + + va_end(params); + + // To use the struct_type_decl, you'll need to execute + // the following to turn it into a var_decl: + // tree var_decl = gg_define_variable( TREE_TYPE(struct_type_decl), + // var_name, + // vs_static); + return struct_type_decl; + } + +tree +gg_get_filelevel_union_type_decl(const char *type_name, int count, ...) + { + tree struct_type_decl = gg_start_building_a_union(type_name, gg_trans_unit.trans_unit_decl); + + va_list params; + va_start(params, count); + + gg_get_union_type_decl(struct_type_decl, count, params); + + va_end(params); + + // To use the struct_type_decl, you'll need to execute + // the following to turn it into a var_decl: + // tree var_decl = gg_define_variable( TREE_TYPE(struct_type_decl), + // var_name, + // vs_static); + return struct_type_decl; + } + +tree +gg_define_local_struct(const char *type_name, const char * var_name, int count, ...) + { + // Builds a structure, declares it as a static variable in the current function, + // and returns the var_decl for it. + tree struct_type_decl = gg_start_building_a_struct(type_name, current_function->function_decl); + + va_list params; + va_start(params, count); + + gg_get_struct_type_decl(struct_type_decl, count, params); + + va_end(params); + // We now have a complete struct_type_decl, whose TREE_TYPE is the + // the type we need when declaring it. + + // And with that done, we can actually define the storage: + tree var_decl = gg_define_variable( TREE_TYPE(struct_type_decl), + var_name, + vs_static); + return var_decl; + } + +tree +gg_struct_field_ref(const tree base, const char *field) + { + tree retval; + + tree type = TREE_TYPE(base); + if( POINTER_TYPE_P (type) ) + { + tree pointer_type = TREE_TYPE(base); + tree base_pointer_type = TREE_TYPE(pointer_type); + // We need a COMPONENT_REF which is an INDIRECT_REF to a FIELD_DECL + tree field_decl = gg_find_field_in_struct(base, field); + tree indirect_ref = build1(INDIRECT_REF, base_pointer_type, base); + retval = build3(COMPONENT_REF, + TREE_TYPE(field_decl), + indirect_ref, + field_decl, + NULL_TREE); + } + else + { + // It's not a pointer, so presumably it's a structure + tree field_decl = gg_find_field_in_struct(base, field); + retval = build3(COMPONENT_REF, + TREE_TYPE(field_decl), + base, + field_decl, + NULL_TREE); + } + return retval; + } + +tree +gg_assign_to_structure(tree var_decl_struct, const char *field, const tree source) + { + // The C equivalent: "struct.field = source" + tree component_ref = gg_struct_field_ref(var_decl_struct,field); + gg_assign(component_ref,source); + return component_ref; + } + +tree +gg_assign_to_structure(tree var_decl_struct, const char *field, int N) + { + // The C equivalent: "struct.field = N" + tree component_ref = gg_struct_field_ref(var_decl_struct,field); + gg_assign(component_ref,build_int_cst(integer_type_node, N)); + return component_ref; + } + +static tree +gg_create_assembler_name(const char *cobol_name) + { + char *psz = cobol_name_mangler(cobol_name); + tree retval = get_identifier(psz); + free(psz); + return retval; + } + +static char * +gg_unique_in_function(const char *var_name, gg_variable_scope_t vs_scope) + { + char *retval = (char *)xmalloc(strlen(var_name)+32); + if( (vs_scope == vs_stack || vs_scope == vs_static) ) + { + sprintf(retval, "%s.%ld", var_name, current_function->program_id_number); + } + else + { + strcpy(retval, var_name); + } + return retval; + } + +tree +gg_declare_variable(tree type_decl, + const char *name, + tree initial_value, + gg_variable_scope_t vs_scope, + bool *already_defined) + { + // The C/C++ language provides the concept of a *declaration*, which is a + // prototype for a variable or function. "extern int global_var" is a + // declaration. Declarations let the compiler know what kind of variable it + // is looking for so that it can know what to do with it when it is found. + // + // A *definition* causes the assembler to actually create data storage for + // the specified var_decl. + // + // Be it hereby known that the various attributes associated with a var_decl, + // things like TREE_PUBLIC and TREE_STATIC and TREE_CONST seem to line up with + // their meanings in the C language. But I haven't investigated it enough to + // be completely sure about that. A hard look at gcc/tree.h is on my list of + // homework assignments. In the meantime, I continue to learn by compiling + // C programs with the fdump-generic-nodes option, and copying them as + // necessary to accomplish specific tasks. + // + // Specifically, this routine creates and returns a VAR_DECL, which is the + // prototype. + // + // The gg_define_variable() routines take a VAR_DECL and create a DECL_EXPR + // node from it. When that DECL_EXPR is appended to the statement list, it + // causes the storage to be allocated. + + // It is routine to let the compiler assign names to stack variables. The + // assembly code doesn't use names for variables on the stack; they are + // referenced by offsets to the base pointer. But static variables have to + // have names, and there are places in my code generation -- Lord only knows + // why -- where I didn't give the variables explicit names. We remedy that + // here: + + static std::mapseen; + + tree var_name = NULL_TREE; + tree var_decl; + // Assume that for an external reference we know what we want: + char *unique_name = NULL; + if( name ) + { + // We were provided a name + unique_name = gg_unique_in_function(name, vs_scope); + var_name = get_identifier(unique_name); + std::map::const_iterator it = seen.find(unique_name); + if( it != seen.end() ) + { + // We've seen this one before + var_decl = it->second; + if( already_defined ) + { + *already_defined = true; + } + } + else + { + var_decl = build_decl(UNKNOWN_LOCATION, + VAR_DECL, + var_name, + type_decl); + } + } + else + { + // We were not provided a name, so we have to create one. + if( vs_scope == vs_static ) + { + // static variables have to have names: + static int counter = 1; + char ach[32]; + sprintf(ach, "__unnamed_static_variable_%d", counter++); + var_name = get_identifier(ach); + } + var_decl = build_decl(UNKNOWN_LOCATION, + VAR_DECL, + var_name, + type_decl); + } + switch(vs_scope) + { + case vs_stack: + // This is a stack variable + DECL_CONTEXT(var_decl) = current_function->function_decl; + break; + case vs_static: + // This is a function-level static variable + DECL_CONTEXT(var_decl) = current_function->function_decl; + TREE_STATIC(var_decl) = 1; + break; + case vs_file_static: + // File static variables have translation_unit_scope. I have chosen to + // provide access to them through a map; see gg_trans_unit_var_decl(); + // TREE_STATIC seems to imply const. + DECL_CONTEXT (var_decl) = gg_trans_unit.trans_unit_decl; + TREE_STATIC(var_decl) = 1; + break; + case vs_file: + // File variables have translation_unit_scope. + // When TREE_STATIC is on, they seem to get put into the .text section + DECL_CONTEXT (var_decl) = gg_trans_unit.trans_unit_decl; + break; + case vs_external: + // This is for defining variables with global scope + DECL_CONTEXT (var_decl) = gg_trans_unit.trans_unit_decl; + TREE_USED(var_decl) = 1; + TREE_STATIC(var_decl) = 1; + TREE_PUBLIC(var_decl) = 1; + seen[unique_name] = var_decl; + break; + case vs_external_reference: + // This is for referencing variables defined elsewhere + // TODO: Figure out why this is working. For accessing "stderr", it + // doesn't matter if TREE_PUBLIC is on, but TREE_STATIC has to be on. This + // does *not* match what is seen when compiling a C program that accesses + // "stderr". + DECL_CONTEXT (var_decl) = gg_trans_unit.trans_unit_decl; + TREE_USED(var_decl) = 1; + TREE_STATIC(var_decl) = 1; + TREE_PUBLIC(var_decl) = 1; + break; + } + DECL_INITIAL(var_decl) = initial_value; + if( unique_name ) + { + free(unique_name); + } + return var_decl; + } + +tree +gg_define_from_declaration(tree var_decl) + { + // Append the var_decl to either the chain for the current function or for + // the translation_unit, depending on the var_decl's context: + gg_append_var_decl(var_decl); + + if( !SCOPE_FILE_SCOPE_P(DECL_CONTEXT(var_decl)) ) + { + // Having made sure the chain of variable declarations is nicely started, + // it's time to actually define the storage with a decl_expression: + tree stmt = build1_loc (location_from_lineno(), + DECL_EXPR, + TREE_TYPE(var_decl), + var_decl); + gg_append_statement(stmt); + } + + // And we are done. That variable is now available for computation. + return var_decl; + } + +tree +gg_define_variable(tree type_decl) + { + tree var_decl = gg_declare_variable(type_decl); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_variable(tree type_decl, tree initial_value) + { + tree var_decl = gg_declare_variable(type_decl, + NULL, + gg_cast(type_decl, initial_value), + vs_stack); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_variable(tree type_decl, gg_variable_scope_t vs_scope) + { + tree var_decl = gg_declare_variable(type_decl, NULL, NULL_TREE, vs_scope); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_variable( tree type_decl, + const char *var_name, + gg_variable_scope_t vs_scope, + tree initial_value) + { + tree var_decl = gg_declare_variable(type_decl, var_name, initial_value, vs_scope); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_variable(tree type_decl, const char *name, gg_variable_scope_t vs_scope) + { + bool already_defined = false; + tree var_decl = gg_declare_variable(type_decl, name, NULL_TREE, vs_scope, &already_defined); + if( !already_defined ) + { + gg_define_from_declaration(var_decl); + } + return var_decl; + } + +tree +gg_define_bool() + { + tree var_decl = gg_declare_variable(BOOL); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_char() + { + // The nearest C equivalent: "char name;", but this one is given a + // compiler-assigned name. + // Beware: This is the "implementation specific" version of char, which + // in GENERIC seems to be signed on Windows/Linux Intel machines. But we + // need to be careful if we use an 8-bit type for numerical calculation. + tree var_decl = gg_declare_variable(CHAR); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_char(const char *variable_name) + { + // The C equivalent: "char name;" + // Beware: This is the "implementation specific" version of char, which + // in GENERIC seems to be signed on Windows/Linux Intel machines. But we + // need to be careful if we use an 8-bit type for numerical calculation. + tree var_decl = gg_declare_variable(CHAR, variable_name); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_char(const char *variable_name, tree ch) + { + tree var_decl = gg_declare_variable(CHAR, variable_name, ch); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_char(const char *variable_name, int ch) + { + return gg_define_char(variable_name, char_nodes[ch&0xFF]); + } + +tree +gg_define_uchar() + { + // The C equivalent: "char name;" + // Beware: This is the "implementation specific" version of char, which + // in GENERIC seems to be signed on Windows/Linux Intel machines. But we + // need to be careful if we use an 8-bit type for numerical calculation. + return gg_define_variable(UCHAR); + } + +tree +gg_define_uchar(const char *variable_name) + { + // The C equivalent: "char name;" + // Beware: This is the "implementation specific" version of char, which + // in GENERIC seems to be signed on Windows/Linux Intel machines. But we + // need to be careful if we use an 8-bit type for numerical calculation. + return gg_define_variable(UCHAR, variable_name); + } + +tree +gg_define_uchar(const char *variable_name, tree ch) + { + tree var_decl = gg_declare_variable(UCHAR, variable_name, ch); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_uchar(const char *variable_name, int ch) + { + return gg_define_char(variable_name, char_nodes[ch&0xFF]); + } + +tree +gg_define_int() + { + tree var_decl = gg_declare_variable(INT); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_int(int N) + { + tree var_decl = gg_declare_variable(INT, NULL, build_int_cst_type(INT, N)); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_int(const char *variable_name) + { + tree var_decl = gg_declare_variable(INT, variable_name); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_int(const char *variable_name, tree N) + { + tree var_decl = gg_declare_variable(INT, variable_name, N); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_int(const char *variable_name, int N) + { + tree var_decl = gg_declare_variable(INT, variable_name, build_int_cst_type(INT, N)); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_size_t() + { + tree var_decl = gg_declare_variable(SIZE_T); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_size_t(const char *variable_name) + { + tree var_decl = gg_declare_variable(SIZE_T, variable_name); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_size_t(tree N) + { + tree retval = gg_define_variable(SIZE_T); + gg_assign(retval, N); + return retval; + } + +tree +gg_define_size_t(size_t N) + { + tree var_decl = gg_declare_variable(SIZE_T, NULL, build_int_cst_type(SIZE_T, N)); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_size_t(const char *variable_name, tree N) + { + tree var_decl = gg_declare_variable(SIZE_T, variable_name, N); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_size_t(const char *variable_name, size_t N) + { + tree var_decl = gg_declare_variable(SIZE_T, variable_name, build_int_cst_type(SIZE_T, N)); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_int128() + { + // The C equivalent: "INT128 ;" + return gg_define_variable(INT128); + } + +tree +gg_define_int128(const char *variable_name) + { + // The C equivalent: "INT128 name;" + return gg_define_variable(INT128, variable_name); + } + +tree +gg_define_int128(const char *variable_name, tree N) + { + // The C equivalent: "INT128 name = N" + tree var_decl = gg_declare_variable(INT128, variable_name, N); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_int128(const char *variable_name, int N) + { + // The C equivalent: "INT128 name = N" + tree var_decl = gg_define_int128(variable_name, build_int_cst_type(INT128, N)); + return var_decl; + } + +tree +gg_define_char_star() + { + // The C equivalent: "char *name;" + return gg_define_variable(CHAR_P); + } + +tree +gg_define_char_star(const char *variable_name) + { + return gg_define_variable(CHAR_P, variable_name); + } + +tree +gg_define_char_star(const char *variable_name, gg_variable_scope_t scope) + { + tree var_decl = gg_declare_variable(CHAR_P, variable_name, NULL_TREE, scope); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_char_star(tree var) + { + tree var_decl = gg_declare_variable(CHAR_P, NULL, var); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_char_star(const char *variable_name, tree var) + { + tree var_decl = gg_declare_variable(CHAR_P, variable_name, var); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_uchar_star() + { + tree var_decl = gg_declare_variable(UCHAR_P); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_uchar_star(const char *variable_name) + { + tree var_decl = gg_declare_variable(UCHAR_P, variable_name); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_uchar_star(const char *variable_name, gg_variable_scope_t scope) + { + tree var_decl = gg_declare_variable(UCHAR_P, variable_name, NULL_TREE, scope); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_uchar_star(tree var) + { + tree var_decl = gg_declare_variable(UCHAR_P, NULL, var); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_uchar_star(const char *variable_name, tree var) + { + tree var_decl = gg_declare_variable(UCHAR_P, variable_name, var); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_void_star() + { + tree var_decl = gg_declare_variable(VOID_P); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_void_star(const char *variable_name) + { + tree var_decl = gg_declare_variable(VOID_P, variable_name); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_void_star(const char *variable_name, tree var) + { + tree var_decl = gg_declare_variable(VOID_P, variable_name, var); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_void_star(const char *variable_name, gg_variable_scope_t scope) + { + tree var_decl = gg_declare_variable(VOID_P, variable_name, NULL_TREE, scope); + gg_define_from_declaration(var_decl); + return var_decl; + } + +tree +gg_define_longdouble() + { + tree var_decl = gg_declare_variable(LONGDOUBLE); + gg_define_from_declaration(var_decl); + return var_decl; + } + +extern tree +gg_define_array(tree type_decl, size_t size) + { + tree array_type = build_array_type_nelts(type_decl, size); + return gg_define_variable(array_type); + } + +extern tree +gg_define_array(tree type_decl, const char *name, size_t size) + { + tree array_type = build_array_type_nelts(type_decl, size); + return gg_define_variable(array_type, name); + } + +extern tree +gg_define_array(tree type_decl, size_t size, gg_variable_scope_t scope) + { + tree array_type = build_array_type_nelts(type_decl, size); + return gg_define_variable(array_type, scope); + } + +extern tree +gg_define_array(tree type_decl, const char *name, size_t size, gg_variable_scope_t scope) + { + tree array_type = build_array_type_nelts(type_decl, size); + return gg_define_variable(array_type, name, scope); + } + +tree +gg_get_address_of(const tree var_decl) + { + // Returns an ADDR_EXPR which points to var_decl. + // The C equivalent is &variable + // We need to be able to use this guy's address directly: + + // In order to do that, this fellow's "addressable" bit has to be on, otherwise + // the GIMPLE reducer creates a temporary variable, sets its value to var_decl's, + // and returns the pointer to the temp. I suppose this has something to do with + // pass by reference and pass by value, but it makes my head hurt, and, frankly, + // I'll take the dangerous road. + + TREE_ADDRESSABLE(var_decl) = 1; + TREE_USED(var_decl) = 1; + return build1( ADDR_EXPR, + build_pointer_type (TREE_TYPE(var_decl)), + var_decl); + } + +tree +gg_get_indirect_reference(tree pointer, tree offset) + { + // The C equivalent: auto pointer[offset]; + + // the returned indirect reference has the same type as + // what pointer points to. If pointer is a char *, then the returned + // value has type char. If pointer is an int *, then the returned + // value has type int. + + // We also want the offset to operate the same way it does in C, so we + // are going to find the size of the objects the pointer points to, and + // multiply the offset by that size: + + tree pointer_type = TREE_TYPE(pointer); + tree element_type = TREE_TYPE(pointer_type); + + tree indirect_reference; + if( offset ) + { + // We can now start building our little shrub: + tree distance = build2( MULT_EXPR, + SIZE_T, + gg_cast(sizetype, offset), + TYPE_SIZE_UNIT(element_type)); + + // Next, we build the pointer_plus_expr: + tree pointer_plus_expr = build2(POINTER_PLUS_EXPR, + pointer_type, + pointer, + distance); + + // With that in hand, we can build the indirect_reference: + indirect_reference = build1(INDIRECT_REF, element_type, pointer_plus_expr); + } + else + { + indirect_reference = build1(INDIRECT_REF, element_type, pointer); + } + + return indirect_reference; + } + +tree +gg_indirect(tree pointer, tree byte_offset) + { + // Unlike gg_get_indirect_reference, which multiplies the offset by the + // size of the type pointed to by pointer, this routine simply adds the offset + // to the pointer. + tree pointer_type = TREE_TYPE(pointer); + tree element_type = TREE_TYPE(pointer_type); + + tree retval; + if( byte_offset == NULL_TREE ) + { + retval = build1(INDIRECT_REF, element_type, pointer); + } + else + { + tree pointer_plus_expr = build2(POINTER_PLUS_EXPR, + pointer_type, + pointer, + gg_cast(SIZE_T, byte_offset)); + retval = build1(INDIRECT_REF, element_type, pointer_plus_expr); + } + + return retval; + } + +tree +gg_array_value(tree pointer, tree offset) + { + // We arrange the function so that it can work on either an ARRAY_TYPE + // or a pointer type + tree pointer_type = TREE_TYPE(pointer); + tree element_type = TREE_TYPE(pointer_type); + if(POINTER_TYPE_P(pointer_type)) + { + // It is a pointer + tree retval = gg_get_indirect_reference(pointer, offset); + return retval; + } + else + { + return build4(ARRAY_REF, + element_type, + pointer, + offset, + NULL_TREE, + NULL_TREE); + } + } + +tree +gg_array_value(tree pointer, int N) + { + return gg_array_value(pointer, build_int_cst(INT, N)); + } + +void +gg_increment(tree var) + { + tree var_type = TREE_TYPE(var); + gg_assign(var, gg_add(var, build_int_cst_type(var_type, 1))); + } + +void +gg_decrement(tree var) + { + tree var_type = TREE_TYPE(var); + gg_assign(var, + gg_cast(var_type, + gg_subtract(var, + build_int_cst_type(var_type, 1)))); + } + +tree +gg_negate(tree var) + { + return build1(NEGATE_EXPR, TREE_TYPE(var), var); + } + +tree +gg_bitwise_not(tree var) + { + return build1(BIT_NOT_EXPR, TREE_TYPE(var), var); + } + +tree +gg_abs(tree var) + { + return build1(ABS_EXPR, TREE_TYPE(var), var); + } + +static tree +gg_get_larger_type(tree A, tree B) + { + tree larger = TREE_TYPE(B); + if( TREE_INT_CST_LOW(TYPE_SIZE(TREE_TYPE(A))) + > TREE_INT_CST_LOW(TYPE_SIZE(TREE_TYPE(B))) ) + { + larger = TREE_TYPE(A); + } + return larger; + } + +tree +gg_add(tree addend1, tree addend2) + { + tree retval; + if( POINTER_TYPE_P(TREE_TYPE(addend1)) ) + { + // operand1 is a pointer. + // Make this work like C pointer arithmetic. We'll find the + // size of the things that pointer points to, and multiply accordingly + tree pointer_type = TREE_TYPE(addend1); + tree pointer_type_type = TREE_TYPE(pointer_type); + tree bytes_per_element = TYPE_SIZE_UNIT(pointer_type_type); + + tree op2 = gg_cast(SIZE_T, gg_multiply(addend2, bytes_per_element)); + retval = build2(POINTER_PLUS_EXPR, + TREE_TYPE(addend1), + addend1, + op2); + } + else + { + // Ordinary addition. Scale both operands to match the larger + // type of the two operands. + tree larger_type = gg_get_larger_type(addend1, addend2); + retval = build2( PLUS_EXPR, + larger_type, + gg_cast(larger_type, addend1), + gg_cast(larger_type, addend2)); + } + return retval; + } + +tree +gg_subtract(tree A, tree B) + { + // We are doing A - B, instead. + + if( POINTER_TYPE_P(TREE_TYPE(A)) && INTEGRAL_TYPE_P(TREE_TYPE(B)) ) + { + // We are subtracting an integer from a pointer. That's handled + // in gg_add, by converting the integer, possibly signed, to + // an unsigned huge number. + return gg_add(A, gg_negate(B)); + } + + if( POINTER_TYPE_P(TREE_TYPE(A)) && POINTER_TYPE_P(TREE_TYPE(A)) ) + { + // We are subtracting two pointers, yielding a signed size_t + return build2(POINTER_DIFF_EXPR, SSIZE_T, A, B); + } + + // This is an ordinary subtraction. Scale everything to the larger_type + // of the two operands. + tree larger_type = gg_get_larger_type(A, B); + tree stmt = build2( MINUS_EXPR, + larger_type, + gg_cast(larger_type, A), + gg_cast(larger_type, B) ); + return stmt; + } + +tree +gg_multiply(tree A, tree B) + { + // We will return the product of A and B, adjusting to + // whichever is larger: + tree larger_type = gg_get_larger_type(A, B); + return build2( MULT_EXPR, larger_type, gg_cast(larger_type, A), gg_cast(larger_type, B) ); + } + +tree +gg_real_divide(tree A, tree B) + { + // This floating point division: + tree larger_type = gg_get_larger_type(A, B); + return build2( RDIV_EXPR, larger_type, gg_cast(larger_type,A), gg_cast(larger_type,B)); + } + +tree +gg_divide(tree A, tree B) + { + // This is the equivalent of C integer divide + tree larger_type = gg_get_larger_type(A, B); + return build2( TRUNC_DIV_EXPR, larger_type, gg_cast(larger_type,A), gg_cast(larger_type,B)); + } + +tree +gg_mod(tree A, tree B) + { + // This is the equivalent of C A % B + tree larger_type = gg_get_larger_type(A, B); + return build2( TRUNC_MOD_EXPR, larger_type, gg_cast(larger_type,A), gg_cast(larger_type,B)); + } + +tree +gg_lshift(tree A, tree B) + { + // Equivalent of A << B; + return build2( LSHIFT_EXPR, TREE_TYPE(A), A, B ); + } + +tree +gg_rshift(tree A, tree B) + { + // Equivalent of A >> B; + return build2( RSHIFT_EXPR, TREE_TYPE(A), A, B ); + } + +tree +gg_bitwise_or(tree A, tree B) + { + // This is C equivalent to A | B + tree larger_type = gg_get_larger_type(A, B); + return build2( BIT_IOR_EXPR, larger_type, gg_cast(larger_type,A), gg_cast(larger_type,B)); + } + +tree +gg_bitwise_xor(tree A, tree B) + { + // This is C equivalent to A ^ B + tree larger_type = gg_get_larger_type(A, B); + return build2( BIT_XOR_EXPR, larger_type, gg_cast(larger_type,A), gg_cast(larger_type,B)); + } + +tree +gg_bitwise_and(tree A, tree B) + { + // This is C equivalent to A & B + tree larger_type = gg_get_larger_type(A, B); + return build2( BIT_AND_EXPR, larger_type, gg_cast(larger_type,A), gg_cast(larger_type,B)); + } + +tree +gg_build_relational_expression(tree operand_a, + enum relop_t op, + tree operand_b) + { + tree_code compare = EQ_EXPR; // Assuage the compiler + switch(op) + { + case eq_op: + compare = EQ_EXPR; + break; + case ne_op: + compare = NE_EXPR; + break; + case lt_op: + compare = LT_EXPR; + break; + case gt_op: + compare = GT_EXPR; + break; + case ge_op: + compare = GE_EXPR; + break; + case le_op: + compare = LE_EXPR; + break; + } + tree relational_expression = build2_loc(location_from_lineno(), + compare, + boolean_type_node, + operand_a, + operand_b); + return relational_expression; + } + +tree +gg_build_logical_expression(tree operand_a, + enum logop_t op, + tree operand_b) + { + tree logical_expression = NULL_TREE; + tree_code logical_op; + switch(op) + { + case and_op: + logical_op = TRUTH_ANDIF_EXPR; + logical_expression = build2(logical_op, + boolean_type_node, + operand_a, + operand_b); + break; + + case or_op: + logical_op = TRUTH_ORIF_EXPR; + logical_expression = build2(logical_op, + boolean_type_node, + operand_a, + operand_b); + break; + + case not_op: + logical_op = TRUTH_NOT_EXPR; + logical_expression = build1(logical_op, + boolean_type_node, + operand_b); + break; + + case xor_op: + logical_op = TRUTH_XOR_EXPR; + logical_expression = build2(logical_op, + boolean_type_node, + operand_a, + operand_b); + break; + + case xnor_op: + case true_op: + case false_op: + // This is handled elsewhere + break; + } + return logical_expression; + } + +void +gg_create_goto_pair(tree *goto_expr, tree *label_expr, tree *label_addr, const char *name) + { + // We are going to create a pair of expressions for our + // caller. They are a matched set of goto/label expressions, + // to be included in a statement list + tree label_decl = build_decl( UNKNOWN_LOCATION, + LABEL_DECL, + gg_create_assembler_name(name), + void_type_node); + DECL_CONTEXT(label_decl) = current_function->function_decl; + TREE_USED(label_decl) = 1; + + *goto_expr = build1(GOTO_EXPR, void_type_node, label_decl); + *label_expr = build1(LABEL_EXPR, void_type_node, label_decl); + *label_addr = gg_get_address_of(label_decl); + } + +void +gg_create_goto_pair(tree *goto_expr, tree *label_expr, tree *label_addr) + { + // We are going to create a pair of expressions for our + // caller. They are a matched set of goto/label expressions, + // to be included in a statement list + tree label_decl = build_decl( UNKNOWN_LOCATION, + LABEL_DECL, + NULL_TREE, + void_type_node); + DECL_CONTEXT(label_decl) = current_function->function_decl; + TREE_USED(label_decl) = 1; + + *goto_expr = build1(GOTO_EXPR, void_type_node, label_decl); + *label_expr = build1(LABEL_EXPR, void_type_node, label_decl); + *label_addr = gg_get_address_of(label_decl); + } + +void +gg_create_goto_pair(tree *goto_expr, + tree *label_expr, + tree *label_addr, + tree *label_decl) + { + // We are going to create a pair of expressions for our + // caller. They are a matched set of goto/label expressions, + // to be included in a statement list + *label_decl = build_decl( UNKNOWN_LOCATION, + LABEL_DECL, + NULL_TREE, + void_type_node); + DECL_CONTEXT(*label_decl) = current_function->function_decl; + TREE_USED(*label_decl) = 1; + + *goto_expr = build1(GOTO_EXPR, void_type_node, *label_decl); + *label_expr = build1(LABEL_EXPR, void_type_node, *label_decl); + *label_addr = gg_get_address_of(*label_decl); + } + +void +gg_goto_label_decl(tree label_decl) + { + tree goto_expr = build1_loc( location_from_lineno(), + GOTO_EXPR, + void_type_node, + label_decl); + gg_append_statement(goto_expr); + } + +void +gg_create_goto_pair(tree *goto_expr, tree *label_expr) + { + // We are going to create a pair of expressions for our + // caller. They are a matched set of goto/label expressions, + // to be included in a statement list + tree label_decl = build_decl( UNKNOWN_LOCATION, + LABEL_DECL, + NULL_TREE, + void_type_node); + DECL_CONTEXT(label_decl) = current_function->function_decl; + TREE_USED(label_decl) = 1; + + *goto_expr = build1(GOTO_EXPR, void_type_node, label_decl); + *label_expr = build1(LABEL_EXPR, void_type_node, label_decl); + } + +void +gg_create_goto_pair(tree *goto_expr, tree *label_expr, const char *name) + { + // We are going to create a pair of named expressions for our + // caller. They are a matched set of goto/label expressions, + // to be included in a statement list + tree label_decl = build_decl( UNKNOWN_LOCATION, + LABEL_DECL, + gg_create_assembler_name(name), + void_type_node); + DECL_CONTEXT(label_decl) = current_function->function_decl; + TREE_USED(label_decl) = 1; + + *goto_expr = build1(GOTO_EXPR, void_type_node, label_decl); + *label_expr = build1(LABEL_EXPR, void_type_node, label_decl); + } + +// Used for implementing SECTIONS and PARAGRAPHS. When you have a +// void *pointer = &&label, gg_goto is the same as +// goto *pointer +void +gg_goto(tree var_decl_pointer) + { + tree go_to = build1_loc(location_from_lineno(), + GOTO_EXPR, + void_type_node, + var_decl_pointer); + gg_append_statement(go_to); + } + +void +gg_while( tree operand_a, + enum relop_t op, + tree operand_b) + { + /* + See demonstration_while_if for the canonical demonstration + + You use it like this: + + WHILE + .... + WEND + + We do the C construct: + + while( a OP b ) + { + + } + + like this: + + goto test + top: + + test: + if( a OP b) + goto top + else + goto leave: + leave: + + */ + + tree goto_top; + tree label_top; + + tree goto_test; + tree label_test; + + tree goto_leave; + tree label_leave; + + gg_create_goto_pair(&goto_top, &label_top); + gg_create_goto_pair(&goto_test, &label_test); + gg_create_goto_pair(&goto_leave, &label_leave); + + tree statement_block = make_node(STATEMENT_LIST); + TREE_TYPE(statement_block) = void_type_node; + + // During development, I tried appending a statement_list to a statement_list, + // intending it to be collected together that way. But it was too smart for me; + // it just unwound the second list and tacked it onto the end of the first. + + // So I used a BIND_EXPR to collect them together. This isn't a new context, so I don't + // point operand[0] at a string of vars, nor operand[2] at a block. + tree bind_expr = build3( BIND_EXPR, + void_type_node, + NULL_TREE, + statement_block, + NULL_TREE); + + // With the pairs created and the bind_expr sorted out, we can now put + // together our while construction: + + gg_append_statement(goto_test); + gg_append_statement(label_top); + gg_append_statement(bind_expr); + gg_append_statement(label_test); + IF( operand_a, op, operand_b ) + gg_append_statement(goto_top); + ELSE + gg_append_statement(goto_leave); + ENDIF + gg_append_statement(label_leave); + + // And here's the statement_list for the programmer to fill + // and end with a WEND + current_function->statement_list_stack.push_back(statement_block); + } + +void +gg_create_true_false_statement_lists(tree relational_expression) + { + // Create the two statement_lists for ifness, one for true and + // the other for false. Put them on the stack, ready for the first + // pop on ELSE and the second pop on ENDIF: + + tree if_true_statement_list = make_node(STATEMENT_LIST); + TREE_TYPE(if_true_statement_list) = void_type_node; + tree if_false_statement_list = make_node(STATEMENT_LIST); + TREE_TYPE(if_false_statement_list) = void_type_node; + + tree conditional = build3( COND_EXPR, + boolean_type_node, + relational_expression, + if_true_statement_list, + if_false_statement_list); + + // We need to put our conditional onto the current_stack: + gg_append_statement(conditional); + + // And with that done, we can push the FALSE and TRUE blocks + // onto the stack in the correct order: + current_function->statement_list_stack.push_back(if_false_statement_list); + current_function->statement_list_stack.push_back(if_true_statement_list); + } + +void +gg_if( tree operand_a, + enum relop_t op, + tree operand_b) + { + /* Listen up, troops. Here's how you use this constructor. + + You use it like this: + + IF( this, LT, that) + .... + ELSE + .... + ENDIF + + You *must* have all three: IF ELSE ENDIF, if you don't, the + current_function->statement_list_stack gets all higgledepiggledy + + It is the C equivalent of + + if( a OP b ) + { + + } + else + { + + } + + This routine pushes the false_statement_list onto current_function->statement_list_stack, + followed by the true_statement_list. + + You then generate statements for the TRUE block + You then pop the current_function->statement_list_stack. + Then you do the same for the FALSE block + You then pop the current_function->statement_list_stack again. + + For the sake of readability, we define ELSE and ENDIF to do + that popping. + + I don't plan on explaining this everywhere it's used. + + See demonstration_while_if for the canonical demonstration + */ + + if( TREE_TYPE(operand_a) != TREE_TYPE(operand_b) ) + { + fprintf(stderr, "%s(): a and b have different TREE_TYPES\n", __func__); + gcc_unreachable(); + } + + // Build the relational expression: + tree relational_expression = + gg_build_relational_expression(operand_a, + op, + operand_b); + + // And with that in hand, create the two statement lists, one for + // true and one for false, and set up the stacks: + gg_create_true_false_statement_lists(relational_expression); + } + +tree +gg_get_function_address(tree return_type, const char *funcname) + { + // This routine finds a function by name. It calls build_fn_decl + // with an empty array of varargs. I haven't investigated all the + // possibilities, but this returns an address expression for a function + // that can be built with any argument[s]. + + // There is no compile-time checking; if you specify disaster, then + // disaster will be what you get. + tree fndecl_type = build_varargs_function_type_array (return_type, + 0, + NULL); + tree function_decl = build_fn_decl (funcname, fndecl_type); + DECL_EXTERNAL (function_decl) = 1; + + tree retval = build1(ADDR_EXPR, build_pointer_type (fndecl_type), function_decl); + + return retval; + } + +void +gg_printf(const char *format_string, ...) + { + // This allows you to use fprintf(stderr, ...) with a format string + // and a list of arguments ending with a NULL + + // Use this for conveniently adding print statements into the generated + // code, for run-time print-statement debugging. gg_write is used for + // actual program code. + + // Note that the return value from the printf() call is *not* available + // to the caller. + + int nargs = 0; + tree args[ARG_LIMIT]; + + // Because this routine is intended for debugging, we are sending the + // text to STDERR + + // Because we don't actually use stderr ourselves, we just pick it up as a + // VOID_P and pass it along to fprintf() + tree t_stderr = gg_declare_variable(VOID_P, "stderr", + NULL_TREE, + vs_external_reference); + + gg_push_context(); + + args[nargs++] = t_stderr; + args[nargs++] = build_string_literal(strlen(format_string)+1, format_string); + + va_list ap; + va_start(ap, format_string); + tree arg = va_arg(ap, tree); + while(arg) + { + if(nargs >= ARG_LIMIT) + { + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + yywarn("###### You *must* be joking!"); + gcc_unreachable(); + } + + if( TREE_CODE(arg) >= NUM_TREE_CODES) + { + // Warning: This test is not completely reliable, because a garbage + // byte could have a valid TREE_CODE. But it does help. + yywarn("You nitwit!"); + yywarn("You forgot to put a NULL_TREE at the end of a " + "gg_printf() again!"); + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + gcc_unreachable(); + } + + args[nargs++] = arg; + arg = va_arg(ap, tree); + } + va_end (ap); + + static tree function = NULL_TREE; + if( !function ) + { + function = gg_get_function_address(INT, "fprintf"); + } + + tree stmt = build_call_array_loc (location_from_lineno(), + INT, + function, + nargs, + args); + gg_append_statement(stmt); + + gg_pop_context(); + } + +tree +gg_fprintf(tree fd, int nargs, const char *format_string, ...) + { + tree retval = gg_define_int(); + gg_push_context(); + tree buffer = gg_define_char_star(); + gg_assign(buffer, gg_cast(CHAR_P, gg_malloc(1024))); + + tree args[ARG_LIMIT]; + + // Set up a call to sprintf: + int argc = 0; + args[argc++] = buffer; + args[argc++] = build_string_literal(strlen(format_string)+1, format_string); + + va_list ap; + va_start(ap, format_string); + tree arg = va_arg(ap, tree); + int narg = 0; + while(narg++ < nargs) + { + if(argc >= ARG_LIMIT) + { + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + yywarn("###### You *must* be joking!"); + gcc_unreachable(); + } + + args[argc++] = arg; + arg = va_arg(ap, tree); + } + va_end (ap); + + static tree function = NULL_TREE; + + if( !function ) + { + function = gg_get_function_address(INT, "sprintf"); + } + + tree stmt = build_call_array_loc (location_from_lineno(), + INT, + function, + argc, + args); + gg_assign(retval, stmt); + gg_write(fd, buffer, gg_strlen(buffer)); + + gg_free(buffer); + gg_pop_context(); + return retval; + } + +tree +gg_read(tree fd, tree buf, tree count) + { + // The C equivalent: "read(fd, buf, count)" + + // Because the caller might need the ssize_t return value, this routine + // returns the statement_decl for the call. It is used this way: + + // tree num_chars = gg_define_int("_num_chars"); + // gg_assign(num_chars, gg_read(fd, buf, count)); + + return gg_call_expr(SSIZE_T, + "read", + fd, + buf, + count, + NULL_TREE); + } + +void +gg_write(tree fd, tree buf, tree count) + { + gg_call(SSIZE_T, + "write", + fd, + buf, + count, + NULL_TREE); + } + +void +gg_memset(tree dest, const tree value, tree size) + { + tree the_call = + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_MEMSET), + 3, + dest, + value, + size); + gg_append_statement(the_call); + } + +tree +gg_memchr(tree buf, tree ch, tree length) + { + tree the_call = fold_convert( + pvoid_type_node, + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_MEMCHR), + 3, + buf, + ch, + length)); + return the_call; + } + +/* Built-in call to memcpy() */ + +void +gg_memcpy(tree dest, const tree src, tree size) + { + tree the_call = build_call_expr_loc( + location_from_lineno(), + builtin_decl_explicit (BUILT_IN_MEMCPY), + 3, + dest, + src, + size); + gg_append_statement(the_call); + } + +/* Built-in call to memmove() */ + +void +gg_memmove(tree dest, const tree src, tree size) + { + tree the_call = build_call_expr_loc( + location_from_lineno(), + builtin_decl_explicit (BUILT_IN_MEMMOVE), + 3, + dest, + src, + size); + gg_append_statement(the_call); + } + +tree +gg_memdup(tree data, tree length) + { + // Duplicates data; gg_free should eventually be called + tree retval = gg_define_char_star(); + gg_assign(retval, gg_malloc(length)); + gg_memcpy(retval, data, length); + return retval; + } + +tree +gg_memdup(tree data, size_t length) + { + // Duplicates data; gg_free should eventually be called + tree retval = gg_define_char_star(); + gg_assign(retval, gg_malloc(length)); + gg_memcpy(retval, data, build_int_cst_type(SIZE_T, length)); + return retval; + } + +void +gg_strcpy(tree dest, tree src) + { + tree the_call = + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_STRCPY), + 2, + dest, + src); + gg_append_statement(the_call); + } + +tree +gg_strcmp(tree A, tree B) + { + tree the_call = fold_convert( + integer_type_node, + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_STRCMP), + 2, + A, + B)); + return the_call; + } + +tree +gg_open(tree char_star_A, tree int_B) + { + return gg_call_expr(INT, + "open", + char_star_A, + int_B, + NULL_TREE); + } + +tree +gg_close(tree int_A) + { + return gg_call_expr(INT, + "close", + int_A, + NULL_TREE); + } + +tree +gg_strncmp(tree char_star_A, tree char_star_B, tree size_t_N) + { + tree the_call = fold_convert( + integer_type_node, + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_STRNCMP), + 3, + char_star_A, + char_star_B, + size_t_N)); + return the_call; + } + +void +gg_return(tree operand) + { + tree stmt; + + if( !gg_trans_unit.function_stack.size() ) + { + // I put this in to cope with the problem of two END PROGRAM statements, which + // should be a syntax error but, as of 2021-02-24, is ignored by GnuCOBOL and + // by our parser. + return ; + } + + // We have to pop ourselves off of the module_name_stack: + gg_call(VOID, + "__gg__module_name_pop", + NULL_TREE); + + if( !operand || !DECL_RESULT(current_function->function_decl) ) + { + // When there is no operand, or if the function result is void, then + // we just generate a return_expr. + stmt = build1_loc(location_from_lineno(), RETURN_EXPR, void_type_node, NULL_TREE); + } + else + { + // Life is a wee bit more complicated, because we want to return the operand + tree function_type = TREE_TYPE(DECL_RESULT(current_function->function_decl)); + tree modify = build2( MODIFY_EXPR, + function_type, + DECL_RESULT(current_function->function_decl), + gg_cast(function_type, operand)); + stmt = build1_loc(location_from_lineno(), RETURN_EXPR, void_type_node, modify); + } + gg_append_statement(stmt); + } + +void +chain_parameter_to_function(tree function_decl, const tree param_type, const char *name) + { + tree parm = build_decl (location_from_lineno(), + PARM_DECL, + get_identifier (name), + param_type); + DECL_CONTEXT(parm) = function_decl; + TREE_USED(parm) = 1; + DECL_INITIAL(parm) = param_type; + + if( DECL_ARGUMENTS(function_decl) ) + { + chainon(DECL_ARGUMENTS(function_decl),parm); + } + else + { + DECL_ARGUMENTS(function_decl) = parm; + } + } + +void +gg_modify_function_type(tree function_decl, tree return_type) + { + tree fndecl_type = build_varargs_function_type_array( return_type, + 0, // No parameters yet + NULL); // And, hence, no types + TREE_TYPE(function_decl) = fndecl_type; + tree resdecl = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, return_type); + DECL_CONTEXT (resdecl) = function_decl; + DECL_RESULT (function_decl) = resdecl; + } + +tree +gg_define_function_with_no_parameters(tree return_type, + const char *funcname, + const char *unmangled_name) + { + // This routine builds a function_decl, puts it on the stack, and + // gives it a context. + + // At this time we don't know how many parameters this function expects, so + // we set things up and we'll tack on the parameters later. + + // Create the FUNCTION_TYPE for that array: + // int nparams = 1; + // tree types[1] = {VOID_P}; + // const char *names[1] = {"_p1"}; + + // tree fndecl_type = build_varargs_function_type_array( return_type, + // nparams, + // types); + + tree fndecl_type = build_varargs_function_type_array( return_type, + 0, // No parameters yet + NULL); // And, hence, no types + + // Create the FUNCTION_DECL for that FUNCTION_TYPE + tree function_decl = build_fn_decl (funcname, fndecl_type); + + // Some of this stuff is magical, and is based on compiling C programs + // and just mimicking the results. + TREE_ADDRESSABLE(function_decl) = 1; + TREE_STATIC(function_decl) = 1; + DECL_EXTERNAL (function_decl) = 0; + DECL_PRESERVE_P (function_decl) = 0; + DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT(function_decl) = 1; + DECL_ARTIFICIAL(function_decl) = 0; + TREE_NOTHROW(function_decl) = 0; + TREE_USED(function_decl) = 1; + + // This code makes COBOL nested programs actual visible on the + // source code "trans_unit_decl" level, but with non-public "static" + // visibility. + if( gg_trans_unit.function_stack.size() == 0 ) + { + // gg_trans_unit.function_stack is empty, so our context is + // the compilation module, and we need to be public: + DECL_CONTEXT (function_decl) = gg_trans_unit.trans_unit_decl; + TREE_PUBLIC(function_decl) = 1; + } + else + { + // The stack has something in it, so we are building a nested function. + // Make the current function our context + DECL_CONTEXT (function_decl) = gg_trans_unit.trans_unit_decl; + TREE_PUBLIC(function_decl) = 0; + + // Append this function to the list of functions and variables + // associated with the computation module. + gg_append_var_decl(function_decl); + } + + // Establish the RESULT_DECL for the function: + tree resdecl = build_decl (location_from_lineno(), RESULT_DECL, NULL_TREE, return_type); + DECL_CONTEXT (resdecl) = function_decl; + DECL_RESULT (function_decl) = resdecl; + + // The function_decl has a .function member, a pointer to struct_function. + // This is quietly, almost invisibly, extremely important. You need to + // call this routine after DECL_RESULT has been established: + + allocate_struct_function(function_decl, false); + + struct gg_function_t new_function = {}; + new_function.context_count = 0; + new_function.function_decl = function_decl; + new_function.our_name = IDENTIFIER_POINTER(DECL_NAME(function_decl)); + new_function.our_unmangled_name = xstrdup(unmangled_name); + new_function.function_address = gg_get_function_address(VOID, new_function.our_name); + + // Each program on the stack gets a unique identifier. This is used, for + // example, to make sure that static variables have unique names. + static size_t program_id = 0; + new_function.program_id_number = program_id++; + + // With everything established, put this function_decl on the stack + gg_trans_unit.function_stack.push_back(new_function); + + // All we need is a context, and we are ready to go: + gg_push_context(); + return function_decl; + } + +void +gg_tack_on_function_parameters(tree function_decl, ...) + { + int nparams = 0; + + tree types[ARG_LIMIT]; + const char *names[ARG_LIMIT]; + + va_list params; + va_start(params, function_decl); + for(;;) + { + tree var_type = va_arg(params, tree); + if( !var_type ) + { + break; + } + + if( TREE_CODE(var_type) >= NUM_TREE_CODES) + { + // Warning: This test is not completely reliable, because a garbage + // byte could have a valid TREE_CODE. But it does help. + yywarn("You nitwit!"); + yywarn("You forgot to put a NULL_TREE at the end of a " + "gg_define_function() again!"); + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + gcc_unreachable(); + } + + const char *name = va_arg(params, const char *); + + types[nparams] = var_type; + names[nparams] = name; + nparams += 1; + if(nparams > ARG_LIMIT) + { + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + yywarn("###### %d parameters? Really? Are you insane?",ARG_LIMIT+1); + gcc_unreachable(); + } + } + va_end(params); + + // Chain the names onto the variables list: + for(int i=0; i= NUM_TREE_CODES) + { + // Warning: This test is not completely reliable, because a garbage + // byte could have a valid TREE_CODE. But it does help. + yywarn("You nitwit!"); + yywarn("You forgot to put a NULL_TREE at the end of a " + "gg_define_function() again!"); + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + gcc_unreachable(); + } + + const char *name = va_arg(params, const char *); + + types[nparams] = var_type; + names[nparams] = name; + nparams += 1; + if(nparams > ARG_LIMIT) + { + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + yywarn("###### %d parameters? Really? Are you insane?", + ARG_LIMIT+1); + gcc_unreachable(); + } + } + va_end(params); + + // Create the FUNCTION_TYPE for that array: + tree fndecl_type = build_varargs_function_type_array( return_type, + nparams, + types); + + // Create the FUNCTION_DECL for that FUNCTION_TYPE + tree function_decl = build_fn_decl (funcname, fndecl_type); + + // Some of this stuff is magical, and is based on compiling C programs + // and just mimicking the results. + TREE_ADDRESSABLE(function_decl) = 1; + TREE_STATIC(function_decl) = 1; + DECL_EXTERNAL (function_decl) = 0; + DECL_PRESERVE_P (function_decl) = 0; + DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT(function_decl) = 1; + DECL_ARTIFICIAL(function_decl) = 0; + TREE_NOTHROW(function_decl) = 0; + TREE_USED(function_decl) = 1; + + // This code makes COBOL nested programs actual visible on the + // source code "trans_unit_decl" level, but with non-public "static" + // visibility. + if( gg_trans_unit.function_stack.size() == 0 ) + { + // gg_trans_unit.function_stack is empty, so our context is + // the compilation module, and we need to be public: + DECL_CONTEXT (function_decl) = gg_trans_unit.trans_unit_decl; + TREE_PUBLIC(function_decl) = 1; + } + else + { + // The stack has something in it, so we are building a nested function. + // Make the current function our context + DECL_CONTEXT (function_decl) = gg_trans_unit.trans_unit_decl; + + // We need to make it public, because otherwise COBOL CALL "func" + // won't be able to find it, because dlopen/dlsym won't find it. + TREE_PUBLIC(function_decl) = 0; + + // Append this function to the list of functions and variables + // associated with the computation module. + gg_append_var_decl(function_decl); + } + + // Chain the names onto the variables list: + for(int i=0; i= NUM_TREE_CODES) + { + // Warning: This test is not completely reliable, because a garbage + // byte could have a valid TREE_CODE. But it does help. + yywarn("You nitwit!"); + yywarn("You forgot to put a NULL_TREE at the end of a " + "gg_define_function() again!"); + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + gcc_unreachable(); + } + + const char *name = va_arg(params, const char *); + + types[nparams] = var_type; + names[nparams] = name; + nparams += 1; + if(nparams > ARG_LIMIT) + { + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + yywarn("###### %d parameters? Really? Are you insane?", + ARG_LIMIT+1); + gcc_unreachable(); + } + } + va_end(params); + + // Create the FUNCTION_TYPE for that array: + tree fndecl_type = build_varargs_function_type_array( return_type, + nparams, + types); + + // Create the FUNCTION_DECL for that FUNCTION_TYPE + tree function_decl = build_fn_decl (funcname, fndecl_type); + + // Some of this stuff is magical, and is based on compiling C programs + // and just mimicking the results. + TREE_ADDRESSABLE(function_decl) = 1; + TREE_STATIC(function_decl) = 1; + DECL_EXTERNAL (function_decl) = 0; + DECL_PRESERVE_P (function_decl) = 0; + DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT(function_decl) = 1; + DECL_ARTIFICIAL(function_decl) = 0; + TREE_NOTHROW(function_decl) = 0; + TREE_USED(function_decl) = 1; + + if( gg_trans_unit.function_stack.size() == 0 ) + { + // gg_trans_unit.function_stack is empty, so our context is + // the compilation module, and we need to be public: + DECL_CONTEXT (function_decl) = gg_trans_unit.trans_unit_decl; + TREE_PUBLIC(function_decl) = 1; + } + else + { + // The stack has something in it, so we are building a nested function. + // Make the current function our context + DECL_CONTEXT (function_decl) = current_function->function_decl; + TREE_PUBLIC(function_decl) = 0; + DECL_STATIC_CHAIN(function_decl) = 1; + } + + // Chain the names onto the variables list: + for(int i=0; i. + ///////// But, alas, I realized that it was just not going to work. + ///////// + ///////// Pity. + ///////// + ///////// But at that point, I was here, and I am leaving this uncooked + ///////// code in case I someday want to return to it. If it becomes + ///////// your job, rather than mine, I encourage you to write a C + ///////// program that uses the GNU extensions that allow true nested + ///////// functions, and reverse engineer the "finish_function" + ///////// function, and get it working. + ///////// + ///////// Good luck. Bob Dubner, 2022-08-13 + + // Because this is a nested function, let's make sure that it actually + // has a function that it is nested within + gcc_assert(gg_trans_unit.function_stack.size() > 1 ); + + /* Genericize before inlining. Delay genericizing nested functions + until their parent function is genericized. Since finalizing + requires GENERIC, delay that as well. */ + + // This is the comment in gcc/c/c-decl.c: + + /* Register this function with cgraph just far enough to get it + added to our parent's nested function list. Handy, since the + C front end doesn't have such a list. */ + + static cgraph_node *node = cgraph_node::get_create (current_function->function_decl); + gcc_assert(node); + + } + else + { + // This makes the function visible on the source code module level. + cgraph_node::finalize_function (current_function->function_decl, true); + } + + if( gg_trans_unit.function_stack.back().context_count ) + { + cbl_internal_error("Residual context count!"); + } + + gg_trans_unit.function_stack.pop_back(); + } + +void +gg_push_context() + { + // Sit back, relax, prepare to be amazed. + + // functions need a context in which they build variables and whatnot. + // they also need to be able to create subcontexts. + + // Functions have an DECL_INITIAL member that points to the first block. The + // first block has a BLOCK_VARS member that points to the first of a chain + // of var_decl entries. The first block has a BLOCK_SUBBLOCKS member that + // points to the block of the first subcontext. + + // Functions have a DECL_SAVED_TREE member that points to the first bind_expr + // That first bind_expr has a BIND_EXPR_BLOCK that points back to the first block + // has a BIND_EXPR_VARS that points back to the first block's first var_decl + // has a BIND_EXPR_BODY that points to the first statement_list + + // Each subsequent context gets a new block that is chained to the prior block through BLOCK_SUBBLOCKS + // Each subsequent context gets a new bind_expr which gets added to the parent context's statement list + + // Yes, it's confusing. Have a nice lie-down. + + // Here's what we need for this recipe: + + // We need a block: + tree block = make_node(BLOCK); + TREE_USED(block) = 1; + BLOCK_SUPERCONTEXT(block) = current_function->function_decl; + + // We need a statement list: + tree statement_list = alloc_stmt_list(); + + // We need a bind_expr: + tree bind_expr = build3(BIND_EXPR, + void_type_node, + NULL_TREE, // There are no vars yet. + statement_list, + block); + TREE_SIDE_EFFECTS(bind_expr) = 1; + + // At this point, we might be creating the initial context for a function, + // or we might be creating a sub-context. + + if( !DECL_INITIAL(current_function->function_decl) ) + { + // We are creating the initial context of the function: + DECL_INITIAL(current_function->function_decl) = block; + DECL_SAVED_TREE(current_function->function_decl) = bind_expr; + + // To avoid an N-squared time complexity when chaining blocks, we save the + // current end of the chain of blocks: + current_function->current_block = block; + } + else + { + // We are in the subtext business: + + // We need to tack on our new block to the end of the + // chain of existing blocks: + tree cblock = current_function->current_block; + BLOCK_SUBBLOCKS(cblock) = block; + current_function->current_block = block; + + // And we need to put our new bind_expr onto the end of the + // current active statement list: + gg_append_statement(bind_expr); + } + + // And now we make our statement_list and bind_expr the active ones: + current_function->statement_list_stack.push_back(statement_list); + current_function->bind_expr_stack.push_back(bind_expr); + + // And the new context is ready to rock and roll + gg_trans_unit.function_stack.back().context_count += 1; + } + +void +gg_pop_context() + { + // Backing out is much easier: + current_function->bind_expr_stack.pop_back(); + current_function->statement_list_stack.pop_back(); + + gg_trans_unit.function_stack.back().context_count -= 1; + } + +static +std::unordered_map fndecl_from_name; + +static +tree +function_decl_from_name(tree return_type, + const char *function_name, + int nargs, + tree arg_types[]) + { + tree fndecl; + std::unordered_map::const_iterator it = + fndecl_from_name.find(function_name); + if( it != fndecl_from_name.end() ) + { + fndecl = it->second; + } + else + { + tree fntype = build_function_type_array(return_type, nargs, arg_types); + fndecl = build_fn_decl (function_name, fntype); + fndecl_from_name[function_name] = fndecl; + } + return fndecl; + } + +tree +gg_call_expr(tree return_type, const char *function_name, ...) + { + // Generalized caller. Params are terminated with NULL_TREE + + // Use this routine to call function_name when you need the return value. + // Typically you will do something like + + // tree call_expr = gg_call_expr(...); + // gg_assign( dest, call_expr ); + + // Note that everyt time call_expr is laid down, the function will be called, + // so you probably don't want to do things like + // gg_assign( dest1, call_expr ); + // gg_assign( dest2, call_expr ); + + int nargs = 0; + static tree arg_types[ARG_LIMIT+1]; + static tree args[ARG_LIMIT+1]; + + va_list ap; + va_start(ap, function_name); + for(;;) + { + if(nargs >= ARG_LIMIT) + { + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + yywarn("###### You *must* be joking!"); + gcc_unreachable(); + } + + tree arg = va_arg(ap, tree); + + if( !arg ) + { + break; + } + + arg_types[nargs] = TREE_TYPE(arg); + args[nargs] = arg; + nargs += 1; + } + arg_types[nargs] = NULL_TREE; + args[nargs] = NULL_TREE; + va_end (ap); + + tree function_decl = function_decl_from_name( return_type, + function_name, + nargs, + arg_types); + DECL_EXTERNAL (function_decl) = 1; + tree the_func_addr = build1(ADDR_EXPR, + build_pointer_type (TREE_TYPE(function_decl)), + function_decl); + tree the_call = build_call_array_loc(location_from_lineno(), + return_type, + the_func_addr, + nargs, + args); + // This routine returns the call_expr; the caller will have to deal with it + // as described up above + return the_call; + } + +void +gg_call(tree return_type, const char *function_name, ...) + { + // Generalized caller. function_name is followed by a NULL_TREE-terminated + // list of formal parameters. + + // Use this routine when you don't care about the return value, and + // you want the subroutine to be invoked. + + int nargs = 0; + static tree arg_types[ARG_LIMIT+1]; + static tree args[ARG_LIMIT+1]; + + va_list ap; + va_start(ap, function_name); + for(;;) + { + if(nargs >= ARG_LIMIT) + { + yywarn("###### %10s in %s:%d", __func__, __FILE__,__LINE__ ); + yywarn("###### You *must* be joking!"); + gcc_unreachable(); + } + + tree arg = va_arg(ap, tree); + + if( !arg ) + { + break; + } + + arg_types[nargs] = TREE_TYPE(arg); + args[nargs] = arg; + nargs += 1; + } + arg_types[nargs] = NULL_TREE; + args[nargs] = NULL_TREE; + va_end (ap); + + tree function_decl = function_decl_from_name( return_type, + function_name, + nargs, + arg_types); + DECL_EXTERNAL (function_decl) = 1; + tree the_func_addr = build1(ADDR_EXPR, + build_pointer_type (TREE_TYPE(function_decl)), + function_decl); + tree the_call = build_call_array_loc(location_from_lineno(), + return_type, + the_func_addr, + nargs, + args); + // This simply executes the_call; any return value is ignored + gg_append_statement(the_call); + } + +tree +gg_call_expr_list(tree return_type, tree function_name, int param_count, tree args[]) + { + // Generalized caller. param_count is the count of params in the arg[]] + + // Use this routine when you need the return value. Typically you + // will do something like + + // tree call_expr_Plist = gg_call_expr_list(...); + // gg_append_statement(call_expr); + + // Note that every time call_expr is invoked, the routine will run again. + + // Avoid that with something like + // gg_assign( dest, gg_call_expr_list(...) ); + + tree the_call = build_call_array_loc(location_from_lineno(), + return_type, + function_name, + param_count, + args); + // This routine returns the call_expr; the caller will have to deal with it + // as described up above + return the_call; + } + +tree +gg_create_bind_expr() + { + // In support of things like PERFORM paragraph, we need to create + // blocks of statements that can be executed. + + // This will be a naked bind_expr, like we use for WHILE construction. + // It's not defining a context, so it has no variable list, nor does + // it point to a block. + + tree statement_block = make_node(STATEMENT_LIST); + TREE_TYPE(statement_block) = void_type_node; + tree bind_expr = build3( BIND_EXPR, + void_type_node, + NULL_TREE, + statement_block, + NULL_TREE); + + return bind_expr; + } + +void +gg_exit(tree exit_code) + { + tree the_call = + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_EXIT), + 1, + exit_code); + gg_append_statement(the_call); + } + +void +gg_abort() + { + tree the_call = + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_ABORT), + 0); + gg_append_statement(the_call); + } + +tree +gg_strlen(tree psz) + { + tree the_call = fold_convert( + size_type_node, + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_STRLEN), + 1, + psz)); + return the_call; + } + +tree +gg_strdup(tree psz) + { + tree the_call = fold_convert( + build_pointer_type(char_type_node), + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_STRDUP), + 1, + psz)); + return the_call; + } + +/* built_in call to malloc() */ + +tree +gg_malloc(tree size) + { + tree the_call = fold_convert( + pvoid_type_node, + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_MALLOC), + 1, + size)); + return the_call; + } + +tree +gg_realloc(tree base, tree size) + { + tree the_call = fold_convert( + pvoid_type_node, + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_REALLOC), + 2, + base, + size)); + return the_call; + } + +tree +gg_realloc(tree base, size_t size) + { + return gg_realloc(base, build_int_cst_type(SIZE_T, size)); + } + +tree +gg_malloc(size_t size) + { + return gg_malloc(build_int_cst_type(SIZE_T, size)); + } + +void +gg_free(tree pointer) + { + tree the_call = + build_call_expr_loc(location_from_lineno(), + builtin_decl_explicit (BUILT_IN_FREE), + 1, + pointer); + gg_append_statement(the_call); + } + +void +gg_record_statement_list_start() + { + // We need a statement list: + tree statement_list = alloc_stmt_list(); + current_function->statement_list_stack.push_back(statement_list); + } + +tree +gg_record_statement_list_finish() + { + tree retval = current_function->statement_list_stack.back(); + current_function->statement_list_stack.pop_back(); + return retval; + } + +size_t +gg_sizeof(tree node) + { + size_t size_in_bytes; + if( DECL_P(node) ) + { + size_in_bytes = TREE_INT_CST_LOW(TYPE_SIZE_UNIT(TREE_TYPE(node))); + } + else + { + gcc_assert(TYPE_P(node)); + size_in_bytes = TREE_INT_CST_LOW(TYPE_SIZE_UNIT(node)); + } + return size_in_bytes; + } + +tree +gg_array_of_size_t( size_t N, size_t *values) + { + tree retval = gg_define_variable(build_pointer_type(SIZE_T)); + gg_assign(retval, gg_cast(build_pointer_type(SIZE_T), gg_malloc( build_int_cst_type(SIZE_T, N * sizeof(size_t))))); + for(size_t i=0; i nop_expr -> addr_expr -> string_cst + + My GENERIC: call_expr -> addr_expr -> array_ref -> string_cst + + I tried for an hour to duplicate the C stuff, but made no headway. + + This comment is a reminder to myself to investigate this, someday, because + I eventually want that ten percent. + */ + + return build_string_literal(strlen(string)+1, string); + } + +void +gg_set_current_line_number(int line_number) + { + sv_current_line_number = line_number; + } + +int +gg_get_current_line_number() + { + return sv_current_line_number; + } + +tree +gg_trans_unit_var_decl(const char *var_name) + { + std::unordered_map::const_iterator it = + gg_trans_unit.trans_unit_var_decls.find(var_name); + if( it != gg_trans_unit.trans_unit_var_decls.end() ) + { + return it->second; + } + return NULL_TREE; + } + +void +gg_insert_into_assembler(const char *format, ...) + { + // This routine inserts text directly into the assembly language stream. + + // Note that if for some reason your text has to have a '%' character, it + // needs to be doubled in the GENERIC tag. And that means if it is in the + // 'format' variable, it needs to be quadrupled. + + // Create the string to be inserted: + char ach[256]; + va_list ap; + va_start(ap, format); + vsnprintf(ach, sizeof(ach), format, ap); + va_end(ap); + + // Create the required generic tag + tree asm_expr = build5_loc( location_from_lineno(), + ASM_EXPR, + VOID, + build_string(strlen(ach), ach), + NULL_TREE, + NULL_TREE, + NULL_TREE, + NULL_TREE); + //SET_EXPR_LOCATION (asm_expr, UNKNOWN_LOCATION); + + // And insert it as a statement + gg_append_statement(asm_expr); + } diff --git a/gcc/cobol/gengen.h b/gcc/cobol/gengen.h new file mode 100644 index 00000000000..8c1bc8daef6 --- /dev/null +++ b/gcc/cobol/gengen.h @@ -0,0 +1,544 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef __GENGEN_H +#define __GENGEN_H + +// Note how the definitions of IF and WHILE lets you use them as +// IF(a,b,c) and WHILE(a,b,c) with no semicolon. +// And, yes, I see that ELSE, ENDIF, and WEND are all the same. Sometimes +// looks *are* important, and the multiple definitions make things easier +// to understand. + +#define IF(a,b,c) gg_if((a),(b),(c)); +#define ELSE current_function->statement_list_stack.pop_back(); +#define ENDIF current_function->statement_list_stack.pop_back(); +#define WHILE(a,b,c) gg_while((a),(b),(c)); +#define WEND current_function->statement_list_stack.pop_back(); + +// mnemonics for variable types: + +#define VOID void_type_node +#define BOOL boolean_type_node +#define CHAR char_type_node +#define SCHAR signed_char_type_node +#define UCHAR unsigned_char_type_node +#define SHORT short_integer_type_node +#define USHORT short_unsigned_type_node +#define WCHAR short_unsigned_type_node +#define INT integer_type_node +#define INT_P build_pointer_type(integer_type_node) +#define UINT unsigned_type_node +#define LONG long_integer_type_node +#define ULONG long_unsigned_type_node +#define LONGLONG long_long_integer_type_node +#define ULONGLONG long_long_unsigned_type_node +#define SIZE_T size_type_node +#define SIZE_T_P (build_pointer_type(SIZE_T)) +#define SSIZE_T ptrdiff_type_node +#define INT128 intTI_type_node +#define UINT128 unsigned_intTI_type_node +#define FLOAT float32_type_node +#define DOUBLE float64_type_node +#define LONGDOUBLE long_double_type_node +#define FLOAT128 float128_type_node +#define VOID_P ptr_type_node +#define VOID_P_P (build_pointer_type(VOID_P)) +#define CHAR_P char_ptr_type_node +#define UCHAR_P uchar_ptr_type_node +#define WCHAR_P wchar_ptr_type_node +#define FILE_P fileptr_type_node + +#define SIZE128 (16) // In bytes + +/* Explanatory note for vs_file_static variables + + In a C program, you can have this variable declaration outside of a + function: + + static const int intvar = 12321; + + It will be visible to any function that follows. After several days of + experimentation and research, I found I was unable to duplicate this + behavior in the GCOBOL code generator. I simply wasn't able to reverse + engineer whatever magical incantations are necessary to declare and define] + variables on the translation unit level rather than on the function level. + + Having reached the point where the structural integrity of my desk was being + threatened by the repeated percussive strikes from my forehead, I turned my + attention to an equivalent workaround. + + On the assembly language level, there is no fundamental way of making a + variable visible to only a specific function. So, to distinguish between + two non-global variables named "fred" in two different functions, the C + compiler appends a dot and a number, with the "number" being different for + the two functions. + + The GCOBOL compiler has been doing just that. So, to implement a + vs_file_static variable, I treat it just like a vs_static variable, but + without appending a differentiator. + + */ + +enum gg_variable_scope_t { + vs_stack, + vs_static, + vs_file_static, // static variable of file scope + vs_external, // Creates a PUBLIC STATIC variable of file scope + vs_external_reference, // References the previous + vs_file, // variable of file scope, without static +}; + +struct gg_function_t + { + // Nomenclature Alert: The "function" in gg_function_t was chosen + // originally because a PROGRAM-ID is implemented as a C-style "function", + // and there are numerous tree variables that refer to "functions". + // Eventually the COBOL compiler grew to handle not just COBOL PROGRAM-ID + // "programs", but also user-defined COBOL FUNCTION-ID "functions". This + // inevitably is confusing. Sorry about that. + + // This structure contains state variables for a single function. + + const char *our_unmangled_name; // This is the original name + const char *our_name; // This is our mangled name + tree function_address; + size_t our_symbol_table_index; + + // The function_decl is fundamental to many, many things + tree function_decl; + + // We keep track of the end of the chain of blocks: + tree current_block; + + // Every function has a context, wherein temporary variables get created + // and whose names won't collide with the names in other function. + + // But it is often necessary to create subcontexts, which inherit names from + // its parent function, but can reuse names, and create new ones, without + // collisions. Each context gets its own bind_expr, each bind_expr points + // to its own block. So, to create subcontexts, we need to know which + // bind_expr we add variable declarations to. + std::vector bind_expr_stack; + + // Every function has a statement list. But there can be statements + // that consist of statement lists. This happens when building IF + // statements (TRUE gets its own list, as does FALSE) and WHILE statements + // (where the execution block is a statement list. This stack enables that + // to happen cleanly, so the programmer doesn't have to be concerned about + // which list is being built. + + // Note that the gg_statement_list_stack can grow larger than the + // current_function->bind_expr_stack stack, because + // there are times -- like inside of WHILE() and IF constructs -- where we + // push onto the statement_list_stack and even create new bind_expr nodes, + // but don't need a full new context. But every new context gets a new + // statement list, and when + // current_function->bind_expr_stack is popped, + // statement_list_stack is popped, too. + std::vector statement_list_stack; + + // COBOL sections and paragraphs are handled identically; it's the context + // that makes them different: PROGRAMS contain SECTIONS, and SECTIONS + // contain paragraphs. I call both SECTIONS and PARAGRAPHS "procs" + + // At any given moment, there is one "current section" and one "current + // paragraph". + struct cbl_proc_t *current_section; + struct cbl_proc_t *current_paragraph; + + tree void_star_temp; // At the end of every paragraph and section, we + // // we need a variable "void *temp" to hold a + // // label for one instruction. Rather than clutter + // // up the code with temporaries, we use this one + // // instance instead. + + tree first_time_through; + + tree skip_init_goto; + tree skip_init_label; + + // We use context_count to detect a mismatch between gg_push_context() and + // gg_pop_context calls, which should be equal at the point gimplify is + // invoked: + int context_count; + + // When a function is called, it comes with zero to N parameters on the + // stack. We treat it as variadic; see parser_division(PROCEDURE) to see + // how we pick up the N values on the stack: + tree formal_parameters; + + // When parser_division(PROCEDURE) is called, it provides a cbl_field_t + // *returning parameter. We stash it here; it's used during parser_exit() + // to provide the data for the program's return value. + cbl_field_t *returning; // This one is on the stack, like a LOCAL-STORAGE + + size_t program_id_number; // Used to give static variables + // // a unique . suffix + + // There are two types of nesting. COBOL nesting is implemented in a + // logical way: All programs are siblings, with the context being the source + // code module. The nested aspect is not reflected in the GENERIC tree. + + // Truly nested functions are implemented within the generic tree; the + // nested function is completely inside the outer function. This was + // implemented to support paragraphs as callable entities. + bool is_truly_nested; + + // This variable, which appears on the stack, contains the exit_address + // for the terminating proc of a PERFORM A or PERFORM A THROUGH B + tree perform_exit_address; + + // This variable is a pointer to the first declarative section of this + // program-id/function. It's used in when creating the linked list of + // declaratives, because the last declarative of a nested function links + // back to the first declarative of its immediate parent. + tree first_declarative_section; + + // is_function is true when this structure is describing a COBOL FUNCTION-ID + // and is false for a PROGRAM-ID + bool is_function; + + // This integer is initially set to one when this function is called by + // our generated main(). It gets incremented by 1 when the routine is + // re-entered: main() -> us -> B -> us + // When processing EXIT PROGRAM, if the counter is greater then 1, it is + // decremented and a return is created. When the counter is 1, the + // EXIT program is treated as a CONTINUE. + tree called_by_main_counter; + }; + +struct cbl_translation_unit_t + { + // GCC calls a source file a "translation unit". This structure contains + // all of the information needed by and for a translation unit. There + // probably should be one, and only one, of these instantiated by the COBOL + // front end. + + // Every function in this code module gets this translation_unit_decl + // as its context. This node is built in parse_enter_file() + tree trans_unit_decl; + + // This is the filename of this trans_unit_decl + const char *filename; + + // This is the stack of function_decls in this translation unit; each + // call to parser_enter_program() pushes onto this stack; each call to + // parser_end_program() pops it. + std::vector function_stack; + + // This is where we keep var_decls because of my inability to figure out how + // to tell the compiler to create data definitions for translation_unit_decl + // variables: + std::unordered_map trans_unit_var_decls; + }; + +extern struct cbl_translation_unit_t gg_trans_unit; + +#define current_function (&gg_trans_unit.function_stack.back()) + +extern GTY(()) tree char_nodes[256] ; +extern GTY(()) tree pvoid_type_node ; +extern GTY(()) tree integer_minusone_node; +extern GTY(()) tree integer_two_node ; +extern GTY(()) tree integer_eight_node ; +extern GTY(()) tree size_t_zero_node ; +extern GTY(()) tree int128_zero_node ; +extern GTY(()) tree int128_five_node ; +extern GTY(()) tree int128_ten_node ; +extern GTY(()) tree bool_true_node ; +extern GTY(()) tree bool_false_node ; +extern GTY(()) tree char_ptr_type_node ; +extern GTY(()) tree uchar_ptr_type_node ; +extern GTY(()) tree wchar_ptr_type_node ; +extern GTY(()) tree long_double_ten_node ; +extern GTY(()) tree sizeof_size_t ; +extern GTY(()) tree sizeof_pointer ; + +// These routines happen when beginning to process a new file, which is also +// known, in GCC, as a "translation unit" +extern void gg_build_translation_unit(const char *filename); + +// For an expression type to actually be implemented in the target +// runtime binary, it has to find its way onto a statement list. (Or be used +// as the second operand of a modify_expr, and so on.) +extern void gg_append_statement(tree stmt); +//// extern void gg_insert_statement(struct tree_stmt_iterator *tsi, tree stmt); + +// For variables: +extern void gg_append_var_decl(tree var); + +// type cast +extern tree gg_float(tree float_type, tree integer_var); +extern tree gg_trunc(tree integer_type, tree float_var); +extern tree gg_cast(tree type, tree var); + +// Assignment, that is to say, A = B +extern void gg_assign(tree dest, const tree source); + +// struct creation and field access +// Create struct, and access a field in a struct +extern tree gg_get_local_struct_type_decl(const char *type_name, int count, ...); +extern tree gg_get_filelevel_struct_type_decl(const char *type_name, int count, ...); +extern tree gg_get_filelevel_union_type_decl(const char *type_name, int count, ...); +extern tree gg_define_local_struct(const char *type_name, const char * var_name, int count ,...); +extern tree gg_find_field_in_struct(const tree var_decl, const char *field_name); +extern tree gg_struct_field_ref(const tree struct_decl, const char *field); +extern tree gg_assign_to_structure(tree var_decl_struct, const char *field, const tree source); +extern tree gg_assign_to_structure(tree var_decl_struct, const char *field, int N); + +// Generalized variable declareres. This don't create storage +extern tree gg_declare_variable(tree type_decl, + const char *name=NULL, + tree initial_value=NULL_TREE, + gg_variable_scope_t vs_scope=vs_stack, + bool *already_defined = NULL); +extern tree gg_define_from_declaration(tree var_decl); + +// Generalized variable definers. These create storage +extern tree gg_define_variable(tree type_decl); +extern tree gg_define_variable(tree type_decl, tree initial_value); +extern tree gg_define_variable(tree type_decl, gg_variable_scope_t vs_scope); +extern tree gg_define_variable(tree type_decl, + const char *name, + gg_variable_scope_t vs_scope=vs_stack); +extern tree gg_define_variable(tree type_decl, + const char *name, + gg_variable_scope_t vs_scope, + tree initial_value); +// Utility definers: +extern tree gg_define_bool(); +extern tree gg_define_char(); +extern tree gg_define_char(const char *variable_name); +extern tree gg_define_char(const char *variable_name, tree ch); +extern tree gg_define_char(const char *variable_name, int ch); + +extern tree gg_define_uchar(); +extern tree gg_define_uchar(const char *variable_name); +extern tree gg_define_uchar(const char *variable_name, tree ch); +extern tree gg_define_uchar(const char *variable_name, int ch); + +extern tree gg_define_int(); +extern tree gg_define_int(int N); +extern tree gg_define_int(const char *variable_name); +extern tree gg_define_int(const char *variable_name, tree N); +extern tree gg_define_int(const char *variable_name, int N); + +extern tree gg_define_size_t(); +extern tree gg_define_size_t(const char *variable_name); +extern tree gg_define_size_t(const char *variable_name, tree N); +extern tree gg_define_size_t(const char *variable_name, size_t N); +extern tree gg_define_size_t(tree N); +extern tree gg_define_size_t(size_t N); + +extern tree gg_define_int128(); +extern tree gg_define_int128(const char *variable_name); +extern tree gg_define_int128(const char *variable_name, tree N); +extern tree gg_define_int128(const char *variable_name, int N); + +extern tree gg_define_longdouble(); + +extern tree gg_define_void_star(); +extern tree gg_define_void_star(tree var); +extern tree gg_define_void_star(const char *variable_name); +extern tree gg_define_void_star(const char *variable_name, tree var); +extern tree gg_define_void_star(const char *variable_name, gg_variable_scope_t scope); + +extern tree gg_define_char_star(); +extern tree gg_define_char_star(tree var); +extern tree gg_define_char_star(const char *variable_name); +extern tree gg_define_char_star(const char *variable_name, tree var); +extern tree gg_define_char_star(const char *variable_name, gg_variable_scope_t scope); + +extern tree gg_define_uchar_star(); +extern tree gg_define_uchar_star(const char *variable_name); +extern tree gg_define_uchar_star(const char *variable_name, gg_variable_scope_t scope); +extern tree gg_define_uchar_star(tree var); +extern tree gg_define_uchar_star(const char *variable_name, tree var); + +// address_of operator; equivalent of C "&buffer" +extern tree gg_get_address_of(const tree var_decl); + +// Array creation and access: +extern tree gg_define_array(tree type_decl, size_t size); +extern tree gg_define_array(tree type_decl, const char *name, size_t size); +extern tree gg_define_array(tree type_decl, size_t size, gg_variable_scope_t scope); +extern tree gg_define_array(tree type_decl, const char *name, size_t size, gg_variable_scope_t scope); + +extern tree gg_array_value(tree pointer, tree offset=NULL_TREE); +extern tree gg_array_value(tree pointer, int N); + +// Here are some unary operations +extern void gg_increment(tree var); +extern void gg_decrement(tree var); +extern tree gg_negate(tree var); // Two's complement negation +extern tree gg_bitwise_not(tree var); // Bitwise inversion +extern tree gg_abs(tree var); // Absolute value + +// And some binary operations: + +extern tree gg_add(tree addend1, tree addend2); +extern tree gg_subtract(tree A, tree B); +extern tree gg_multiply(tree A, tree B); +extern tree gg_real_divide(tree A, tree B); // Floating point division +extern tree gg_divide(tree A, tree B); // Integer division +extern tree gg_mod(tree A, tree B); +extern tree gg_lshift(tree A, tree B); +extern tree gg_rshift(tree A, tree B); +extern tree gg_bitwise_or(tree A, tree B); +extern tree gg_bitwise_xor(tree A, tree B); +extern tree gg_bitwise_and(tree A, tree B); + +// Conditionals: Use the IF() and WHILE() macros, which generated +// code that calls these functions. Calling them yourself is +// probably a bad idea because there are stacks that have to be +// kept in the right states. + +extern tree gg_build_relational_expression( tree operand_a, + enum relop_t op, + tree operand_b); +extern tree gg_build_logical_expression(tree operand_a, + enum logop_t op, + tree operand_b); + +extern void gg_create_true_false_statement_lists(tree relational_expression); +extern void gg_while(tree operand_a, enum relop_t op, tree operand_b); +extern void gg_if( tree operand_a, enum relop_t op, tree operand_b); + +// Are are some system functions that can be useful. gg_printf is +// particularly useful for generating run-time messages. Actual run-time +// code is built using write(), because it allows for file descriptors and +// doesn't require null-terminated strings. + +extern tree gg_get_function_address(tree return_type, const char *funcname); +extern void gg_printf(const char *format_string, ...); +extern tree gg_fprintf(tree fd, int nargs, const char *format_string, ...); +extern tree gg_read(tree fd, tree buf, tree count); +extern void gg_write(tree fd, tree buf, tree count); +extern void gg_memset(tree dest, const tree value, tree size); +extern tree gg_memchr(tree s, tree c, tree n); +extern void gg_memcpy(tree dest, const tree src, tree size); +extern void gg_memmove(tree dest, const tree src, tree size); +extern tree gg_memdup(tree data, tree length); +extern tree gg_memdup(tree data, size_t length); +extern void gg_strcpy(tree char_star_A, tree char_star_B); +extern tree gg_strdup(tree char_star_A); +extern tree gg_strcmp(tree char_star_A, tree char_star_B); +extern tree gg_strncmp(tree char_star_A, tree char_star_B, tree size_t_N); + +// Flow control inside a function +extern void gg_return(tree operand = NULL_TREE); + +// These routines are the preample and postamble that bracket everything else +extern void gg_define_function(tree return_type, const char *funcname, ...); +extern tree gg_define_function_with_no_parameters(tree return_type, + const char *funcname, + const char *unmangled_name); +extern void chain_parameter_to_function( tree function_decl, + const tree param_type, + const char *name); + +extern void gg_finalize_function(); +extern void gg_push_context(); +extern void gg_pop_context(); + +// These are a generalized call constructor. The first for when you just want +// the function called, because you don't care about the return value. The others +// are for when you do need the return value. +extern tree gg_call_expr_list(tree return_type, tree function_name, int param_count, tree[]); + +// The following is a garden-variety call, with known return type and known +// but in the case where the return value is unimportant. +extern void gg_call (tree return_type, const char *function_name, ...); +extern tree gg_call_expr(tree return_type, const char *function_name, ...); + +// Returns a simple entangled goto/comefrom pair. Used for things like +// IF/ELSE/ENDIF and WHILE/WEND +void gg_create_goto_pair(tree *goto_expr, tree *label_expr); +void gg_create_goto_pair(tree *goto_expr, tree *label_expr, const char *name); + +// This more complex version is used for implementing SECTIONS and PARAGRAPHS. +void gg_create_goto_pair( tree *goto_expr, + tree *label_expr, + tree *label_addr, + const char *name); +void gg_create_goto_pair( tree *goto_expr, + tree *label_expr, + tree *label_addr); +void gg_create_goto_pair( tree *goto_expr, + tree *label_expr, + tree *label_addr, + tree *label_decl); +void gg_goto_label_decl(tree label_decl); + +// Used for implementing SECTIONS and PARAGRAPHS. When you have a +// void *pointer = &&label, gg_goto is the same as +// goto *pointer +void gg_goto(tree pointer); + +void gg_record_statement_list_start(); +tree gg_record_statement_list_finish(); + +// These routines are in support of PERFORM PARAGRAPH +extern tree gg_get_function_decl(tree return_type, const char *funcname, ...); + +// Used to call system exit() +extern void gg_exit(tree exit_code); +extern void gg_abort(); + +extern tree gg_malloc(tree length); +extern tree gg_malloc(size_t length); +extern tree gg_realloc(tree base, tree length); +extern tree gg_realloc(tree base, size_t length); +extern void gg_free(tree pointer); +extern tree gg_strlen(tree psz); +extern size_t gg_sizeof(tree decl_node); + +extern tree gg_array_of_field_pointers( size_t N, + cbl_field_t **fields ); +extern tree gg_array_of_size_t( size_t N, size_t *values); +extern tree gg_array_of_bytes( size_t N, unsigned char *values); +extern tree gg_indirect(tree pointer, tree byte_offset = NULL_TREE); +extern tree gg_string_literal(const char *string); + +#define CURRENT_LINE_NUMBER (cobol_location().first_line) +location_t location_from_lineno(); + +// When set to true, use UNKNOWN_LOCATION instead of CURRENT_LINE_NUMBER +extern void gg_set_current_line_number(int line_number); +extern int gg_get_current_line_number(); + +extern tree gg_trans_unit_var_decl(const char *var_name); + +tree gg_open(tree char_star_A, tree int_B); +tree gg_close(tree int_A); +tree gg_get_indirect_reference(tree pointer, tree offset); +void gg_insert_into_assembler(const char *format, ...); +void gg_modify_function_type(tree function_decl, tree return_type); +#endif diff --git a/gcc/cobol/genmath.cc b/gcc/cobol/genmath.cc new file mode 100644 index 00000000000..138551b1f6c --- /dev/null +++ b/gcc/cobol/genmath.cc @@ -0,0 +1,1730 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "cobol-system.h" +#include "coretypes.h" +#include "tree.h" +#define HOWEVER_GCC_DEFINES_TREE 1 +#include "ec.h" +#include "common-defs.h" +#include "util.h" +#include "cbldiag.h" +#include "symbols.h" +#include "inspect.h" +#include "io.h" +#include "genapi.h" +#include "genutil.h" +#include "gengen.h" +#include "structs.h" +#include "gcobolio.h" +#include "libgcobol.h" +#include "show_parse.h" + +void +set_up_on_exception_label(cbl_label_t *arithmetic_label) + { + if( arithmetic_label ) + { + if( !arithmetic_label->structs.arith_error ) + { + arithmetic_label->structs.arith_error + = (cbl_arith_error_t *)xmalloc(sizeof(struct cbl_arith_error_t) ); + // Set up the address pairs for this clause + gg_create_goto_pair(&arithmetic_label->structs.arith_error->over.go_to, + &arithmetic_label->structs.arith_error->over.label); + gg_create_goto_pair(&arithmetic_label->structs.arith_error->into.go_to, + &arithmetic_label->structs.arith_error->into.label); + gg_create_goto_pair(&arithmetic_label->structs.arith_error->bottom.go_to, + &arithmetic_label->structs.arith_error->bottom.label); + } + } + } + +void +set_up_compute_error_label(cbl_label_t *compute_label) + { + if( compute_label ) + { + if( !compute_label->structs.compute_error ) + { + compute_label->structs.compute_error + = (cbl_compute_error_t *) + xmalloc(sizeof(struct cbl_compute_error_t) ); + compute_label->structs.compute_error->compute_error_code + = gg_define_int(0); + } + } + } + +static void +set_up_arithmetic_error_handler(cbl_label_t *error, + cbl_label_t *not_error) + { + Analyze(); + // There might, or might not, be error and/or not_error labels: + set_up_on_exception_label(error); + set_up_on_exception_label(not_error); + } + +static void +arithmetic_operation(size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + size_t nB, cbl_refer_t *B, + cbl_arith_format_t format, + cbl_label_t *error, + cbl_label_t *not_error, + tree compute_error, // Pointer to int + const char *operation, + cbl_refer_t *remainder = NULL) + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_TEXT_AB("performing ", operation, "") + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_TEXT_ABC("calling ", operation, "") + TRACE1_END + for(size_t ii=0; iifield->type; + temp_field.attr = (remainder->field->attr | intermediate_e) & ~initialized_e; + temp_field.level = 1; + temp_field.data.memsize = remainder->field->data.memsize ; + temp_field.data.capacity = remainder->field->data.capacity; + temp_field.data.digits = remainder->field->data.digits ; + temp_field.data.rdigits = remainder->field->data.rdigits ; + temp_field.data.initial = remainder->field->data.initial ; + temp_field.data.picture = remainder->field->data.picture ; + parser_symbol_add(&temp_field); + temp_remainder.field = &temp_field; + + // For division, the optional remainder goes onto the beginning of the + // list + results[ncount++] = temp_remainder; + } + for(size_t i=0; istructs.arith_error->into.go_to ); + } + ELSE + ENDIF + } + + if( not_error ) + { + IF( gg_indirect(compute_error), eq_op, integer_zero_node) + { + // There wasn't a computation error + gg_append_statement( not_error->structs.arith_error->into.go_to ); + } + ELSE + ENDIF + } + + // With the operation and the two possible GO TOs laid down, it's time + // to create the target labels for exiting the ON [NOT] SIZE ERROR blocks: + if( error ) + { + gg_append_statement( error->structs.arith_error->bottom.label ); + } + if( not_error ) + { + gg_append_statement( not_error->structs.arith_error->bottom.label ); + } + } + +static bool +is_somebody_float(size_t nA, cbl_refer_t *A) + { + bool retval = false; + for(size_t i=0; itype == FldFloat) + { + retval = true; + break; + } + } + return retval; + } + +static bool +is_somebody_float(size_t nC, cbl_num_result_t *C) + { + bool retval = false; + for(size_t i=0; itype == FldFloat) + { + retval = true; + break; + } + } + return retval; + } + +static bool +all_results_binary(size_t nC, cbl_num_result_t *C) + { + bool retval = true; + + for(size_t i=0; idata.digits != 0 || C[i].refer.field->type == FldFloat ) + { + retval = false; + break; + } + } + return retval; + } + +static tree +largest_binary_term(size_t nA, cbl_refer_t *A) + { + tree retval = NULL_TREE; + uint32_t max_capacity = 0; + int is_negative = 0; + + for(size_t i=0; idata.rdigits || A[i].field->type == FldFloat ) + { + // We are prepared to work only with integers + retval = NULL_TREE; + break; + } + if( A[i].field->type == FldLiteralN +// || A[i].field->type == FldNumericDisplay + || A[i].field->type == FldNumericBinary + || A[i].field->type == FldNumericBin5 + || A[i].field->type == FldIndex + || A[i].field->type == FldPointer ) + { + // This is an integer type that can be worked with quickly + is_negative |= ( A[i].field->attr & signable_e ); + max_capacity = std::max(max_capacity, A[i].field->data.capacity); + retval = tree_type_from_size(max_capacity, is_negative); + } + else + { + // This is a type we don't care to deal with for fast arithmetic + retval = NULL_TREE; + break; + } + } + return retval; + } + +static bool +fast_add( size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + cbl_arith_format_t format ) + { + bool retval = false; + if( all_results_binary(nC, C) ) + { + Analyze(); + // All targets are non-PICTURE binaries: + //gg_insert_into_assembler("# DUBNER addition START"); + tree term_type = largest_binary_term(nA, A); + if( term_type ) + { + // All the terms are things we can work with. + + // We need to calculate the sum of all the A[] terms using term_type as + // the intermediate type: + + tree sum = gg_define_variable(term_type); + tree addend = gg_define_variable(term_type); + get_binary_value( sum, + NULL, + A[0].field, + refer_offset_source(A[0])); + + // Add in the rest of them: + for(size_t i=1; idata.capacity, 0); + tree dest_addr = gg_add(member(C[i].refer.field->var_decl_node, "data"), + refer_offset_dest(C[i].refer)); + tree ptr = gg_cast(build_pointer_type(dest_type), dest_addr); + if( format == giving_e ) + { + // We are assigning + gg_assign( gg_indirect(ptr), + gg_cast(dest_type, sum)); + } + else + { + // We are accumulating + gg_assign( gg_indirect(ptr), + gg_add( gg_indirect(ptr), + gg_cast(dest_type, sum))); + } + } + retval = true; + } + + //gg_insert_into_assembler("# DUBNER addition END "); + } + return retval; + } + +static bool +fast_subtract(size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + size_t nB, cbl_refer_t *B, + cbl_arith_format_t format) + { + bool retval = false; + if( all_results_binary(nC, C) ) + { + Analyze(); + // All targets are non-PICTURE binaries: + //gg_insert_into_assembler("# DUBNER addition START"); + tree term_type = largest_binary_term(nA, A); + + if( term_type && format == giving_e ) + { + tree term_type_B = largest_binary_term(nB, B); + if( term_type_B ) + { + if(TREE_INT_CST_LOW(TYPE_SIZE(term_type_B)) + > TREE_INT_CST_LOW(TYPE_SIZE(term_type)) ) + { + term_type = term_type_B; + } + } + else + { + term_type = NULL_TREE; + } + } + + if( term_type ) + { + // All the terms are things we can work with. + + // We need to calculate the sum of all the A[] terms using term_type as + // the intermediate type: + + tree sum = gg_define_variable(term_type); + tree addend = gg_define_variable(term_type); + get_binary_value(sum, NULL, A[0].field, refer_offset_dest(A[0])); + + // Add in the rest of them: + for(size_t i=1; idata.capacity, 0); + tree dest_addr = gg_add(member(C[i].refer.field->var_decl_node, "data"), + refer_offset_dest(C[i].refer)); + tree ptr = gg_cast(build_pointer_type(dest_type), dest_addr); + if( format == giving_e ) + { + // We are assigning + gg_assign( gg_indirect(ptr), + gg_cast(dest_type, sum)); + } + else + { + // We are subtracting the sum from C[i] + gg_assign( gg_indirect(ptr), + gg_subtract(gg_indirect(ptr), + gg_cast(dest_type, sum))); + } + } + retval = true; + } + } + return retval; + } + +static bool +fast_multiply(size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + size_t nB, cbl_refer_t *B) + { + bool retval = false; + if( all_results_binary(nC, C) ) + { + Analyze(); + // All targets are non-PICTURE binaries: + //gg_insert_into_assembler("# DUBNER addition START"); + tree term_type = largest_binary_term(nA, A); + + if( term_type && nB ) + { + tree term_type_B = largest_binary_term(nB, B); + if( term_type_B ) + { + if(TREE_INT_CST_LOW(TYPE_SIZE(term_type_B)) + > TREE_INT_CST_LOW(TYPE_SIZE(term_type)) ) + { + term_type = term_type_B; + } + } + else + { + term_type = NULL_TREE; + } + } + + if( term_type ) + { + // All the terms are things we can work with. + + tree valA = gg_define_variable(term_type); + tree valB = gg_define_variable(term_type); + get_binary_value(valA, NULL, A[0].field, refer_offset_dest(A[0])); + + if( nB ) + { + // This is a MULTIPLY Format 2 + get_binary_value(valB, NULL, B[0].field, refer_offset_dest(B[0])); + } + + if(nB) + { + gg_assign(valA, gg_multiply(valA, valB)); + } + + // We now either multiply into C[n] or assign A * B to C[n]: + for(size_t i=0; idata.capacity, 0); + tree dest_addr = gg_add(member(C[i].refer.field->var_decl_node, "data"), + refer_offset_dest(C[i].refer)); + tree ptr = gg_cast(build_pointer_type(dest_type), dest_addr); + if( nB ) + { + // We put A * B into C + gg_assign(gg_indirect(ptr), gg_cast(dest_type, valA)); + } + else + { + // We multiply C = valA * C + gg_assign(gg_indirect(ptr), + gg_multiply(gg_indirect(ptr), valA)); + } + } + retval = true; + } + + //gg_insert_into_assembler("# DUBNER addition END "); + } + return retval; + } + +static bool +fast_divide(size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + size_t nB, cbl_refer_t *B, + cbl_refer_t remainder) + { + bool retval = false; + if( all_results_binary(nC, C) ) + { + Analyze(); + // All targets are non-PICTURE binaries: + //gg_insert_into_assembler("# DUBNER addition START"); + tree term_type = largest_binary_term(nA, A); + + if( term_type && nB ) + { + tree term_type_B = largest_binary_term(nB, B); + if( term_type_B ) + { + if(TREE_INT_CST_LOW(TYPE_SIZE(term_type_B)) + > TREE_INT_CST_LOW(TYPE_SIZE(term_type)) ) + { + term_type = term_type_B; + } + } + else + { + term_type = NULL_TREE; + } + } + + if( term_type ) + { + // All the terms are things we can work with. + + tree divisor = gg_define_variable(term_type); + tree dividend = gg_define_variable(term_type); + tree quotient = NULL_TREE; + get_binary_value(divisor, NULL, A[0].field, refer_offset_dest(A[0])); + + if( nB ) + { + // This is a MULTIPLY Format 2, where we are dividing A into B and + // assigning that to C + get_binary_value(dividend, NULL, B[0].field, refer_offset_dest(B[0])); + + quotient = gg_define_variable(term_type); + // Yes, in this case the divisor and dividend are switched. Things are + // tough all over. + gg_assign(quotient, gg_divide(divisor, dividend)); + } + + // We now either divide into C[n] or assign dividend/divisor to C[n]: + for(size_t i=0; idata.capacity, 0); + tree dest_addr = gg_add(member(C[i].refer.field->var_decl_node, "data"), + refer_offset_dest(C[i].refer)); + tree ptr = gg_cast(build_pointer_type(dest_type), dest_addr); + if( nB ) + { + // We put A * B into C + gg_assign(gg_indirect(ptr), gg_cast(dest_type, quotient)); + } + else + { + // We divide the divisor into C + gg_assign(gg_indirect(ptr), + gg_divide(gg_indirect(ptr), divisor)); + } + + // This is where we handle any remainder, keeping in mind that for + // nB != 0, the actual dividend is in the value we have named "divisor". + // + // And, yes, I hate comments like that, too. + + // We calculate the remainder by calculating + // dividend minus quotient * divisor + if( remainder.field ) + { + tree dest_addr = gg_add(member(remainder.field->var_decl_node, "data"), + refer_offset_dest(remainder)); + dest_type = tree_type_from_size(remainder.field->data.capacity, 0); + ptr = gg_cast(build_pointer_type(dest_type), dest_addr); + + gg_assign(gg_indirect(ptr), + gg_cast(dest_type, gg_subtract(divisor, + gg_multiply(quotient, dividend)))); + } + } + retval = true; + } + + //gg_insert_into_assembler("# DUBNER addition END "); + } + return retval; + } + +void +parser_add( size_t nC, cbl_num_result_t *C, + size_t nA, cbl_refer_t *A, + cbl_arith_format_t format, + cbl_label_t *error, + cbl_label_t *not_error, + void *compute_error_p ) // Cast this to a tree / int * + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + fprintf(stderr, " A[%ld]:", nA); + for(size_t i=0; i 0) + { + fprintf(stderr, ","); + } + fprintf(stderr, "%s", A[i].field->name); + } + + fprintf(stderr, "%s", format==giving_e? " GIVING" : ""); + + fprintf(stderr, " C[%ld]:", nC); + for(size_t i=0; i 0) + { + fprintf(stderr, ","); + } + fprintf(stderr, "%s", C[i].refer.field->name); + } + + SHOW_PARSE_END + } + + TRACE1 + { + TRACE1_HEADER + TRACE1_END + } + + tree compute_error = (tree)compute_error_p; + if( compute_error == NULL ) + { + gg_assign(var_decl_default_compute_error, integer_zero_node); + compute_error = gg_get_address_of(var_decl_default_compute_error); + } + bool handled = false; + + if( fast_add( nC, C, + nA, A, + format) ) + { + handled = true; + } + else + { + bool computation_is_float = is_somebody_float(nA, A) + || is_somebody_float(nC, C); + // We now start deciding which arithmetic routine we are going to use: + if( computation_is_float ) + { + switch( format ) + { + case no_giving_e: + { + // Float format 1 + + set_up_arithmetic_error_handler(error, + not_error); + // Do phase 1, which calculates the subtotal and puts it into a + // temporary location + arithmetic_operation( 0, NULL, + nA, A, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__add_float_phase1"); + + // Do phase 2, which accumulates the subtotal into each target location in turn + for(size_t i=0; i + structs.compute_error-> + compute_error_code) + : gg_get_address_of(var_decl_default_compute_error) ; + SHOW_PARSE + { + SHOW_PARSE_HEADER + SHOW_PARSE_REF(" ", cref) + SHOW_PARSE_TEXT(" = ") + SHOW_PARSE_REF("", aref) + char ach[4] = " "; + ach[1] = op; + SHOW_PARSE_TEXT(ach); + SHOW_PARSE_REF("", bref) + SHOW_PARSE_END + } + + // We have to do the trace in before/after mode; parser_op(a, a, op, a) + // is a legitimate call. + TRACE1 + { + TRACE1_HEADER + char ach[4] = " "; + ach[1] = op; + TRACE1_TEXT_ABC("operation is \"", ach, "\"") + TRACE1_INDENT + TRACE1_REFER("operand A: ", aref, "") + TRACE1_INDENT + TRACE1_REFER("operand B: ", bref, "") + TRACE1_INDENT + TRACE1_TEXT_ABC("result will be ", cref.field->name, "") + TRACE1_END + } + + struct cbl_num_result_t for_call = {}; + for_call.rounded = truncation_e; + for_call.refer = cref; + + switch(op) + { + case '+': + { + cbl_refer_t A[2]; + A[0] = aref; + A[1] = bref; + parser_add( 1, &for_call, + 2, A, + giving_e, + NULL, + NULL, + compute_error ); + break; + } + + case '-': + { + cbl_refer_t A[1]; + cbl_refer_t B[1]; + A[0] = bref; + B[0] = aref; + // Yes, the A-ness and B-ness are not really consistent + parser_subtract(1, &for_call, + 1, A, + 1, B, + giving_e, + NULL, + NULL, + compute_error ); + break; + } + + case '*': + { + cbl_refer_t A[1]; + cbl_refer_t B[1]; + A[0] = bref; + B[0] = aref; + parser_multiply(1, &for_call, + 1, A, + 1, B, + NULL, + NULL, + compute_error ); + break; + } + + case '/': + { + cbl_refer_t A[1]; + cbl_refer_t B[1]; + A[0] = aref; + B[0] = bref; + parser_divide(1, &for_call, + 1, A, + 1, B, + NULL, + NULL, + NULL, + compute_error ); + break; + } + + case '^': + { + arithmetic_operation( 1, &for_call, + 1, &aref, + 1, &bref, + no_giving_e, + NULL, + NULL, + compute_error, + "__gg__pow", + NULL); + break; + } + default: + cbl_internal_error( "parser_op() doesn't know how to " + "evaluate \"%s = %s %c %s\"\n", + cref.field->name, + aref.field->name, + op, + bref.field->name); + break; + } + } + +void +parser_subtract(size_t nC, cbl_num_result_t *C, // C = B - A + size_t nA, cbl_refer_t *A, + size_t nB, cbl_refer_t *B, + cbl_arith_format_t format, + cbl_label_t *error, + cbl_label_t *not_error, + void *compute_error_p ) // Cast this to a tree / int * + { + Analyze(); + SHOW_PARSE + { + SHOW_PARSE_HEADER + fprintf(stderr, " A[%ld]:", nA); + for(size_t i=0; i 0) + { + fprintf(stderr, ","); + } + fprintf(stderr, "%s", A[i].field->name); + } + + fprintf(stderr, " B[%ld]:", nB); + for(size_t i=0; i 0) + { + fprintf(stderr, ","); + } + fprintf(stderr, "%s", B[i].field->name); + } + + fprintf(stderr, " C[%ld]:", nC); + for(size_t i=0; i 0) + { + fprintf(stderr, ","); + } + fprintf(stderr, "%s", C[i].refer.field->name); + } + + SHOW_PARSE_END + } + + // We are going to look for configurations that allow us to do binary + // arithmetic and quickly assign the results: + + // no_giving_e is format 1; giving_e is format 2. + + bool handled = false; + + tree compute_error = (tree)compute_error_p; + if( compute_error == NULL ) + { + gg_assign(var_decl_default_compute_error, integer_zero_node); + compute_error = gg_get_address_of(var_decl_default_compute_error); + } + + if( fast_subtract(nC, C, + nA, A, + nB, B, + format) ) + { + handled = true; + } + else + { + bool computation_is_float = is_somebody_float(nA, A) + || is_somebody_float(nC, C); + + // We now start deciding which arithmetic routine we are going to use: + + if( computation_is_float ) + { + switch( format ) + { + case no_giving_e: + { + // Float format 1 + + set_up_arithmetic_error_handler(error, + not_error); + // Do phase 1, which calculates the subtotal and puts it into a + // temporary location + arithmetic_operation( 0, NULL, + nA, A, + 0, NULL, + format, + error, + not_error, + compute_error, + "__gg__add_float_phase1"); + + // Do phase 2, which subtracts the subtotal from each target in turn + for(size_t i=0; icurrent_filename; + +tree var_decl_exception_code; // int __gg__exception_code; +tree var_decl_exception_handled; // int __gg__exception_handled; +tree var_decl_exception_file_number; // int __gg__exception_file_number; +tree var_decl_exception_file_status; // int __gg__exception_file_status; +tree var_decl_exception_file_name; // const char *__gg__exception_file_name; +tree var_decl_exception_statement; // const char *__gg__exception_statement; +tree var_decl_exception_source_file; // const char *__gg__exception_source_file; +tree var_decl_exception_line_number; // int __gg__exception_line_number; +tree var_decl_exception_program_id; // const char *__gg__exception_program_id; +tree var_decl_exception_section; // const char *__gg__exception_section; +tree var_decl_exception_paragraph; // const char *__gg__exception_paragraph; + +tree var_decl_default_compute_error; // int __gg__default_compute_error; +tree var_decl_rdigits; // int __gg__rdigits; +tree var_decl_odo_violation; // int __gg__odo_violation; +tree var_decl_unique_prog_id; // size_t __gg__unique_prog_id; + +tree var_decl_entry_location; // This is for managing ENTRY statements +tree var_decl_exit_address; // This is for implementing pseudo_return_pop + +tree var_decl_call_parameter_signature; // char *__gg__call_parameter_signature +tree var_decl_call_parameter_count; // int __gg__call_parameter_count +tree var_decl_call_parameter_lengths; // size_t *__gg__call_parameter_count + +tree var_decl_return_code; // short __gg__data_return_code + +tree var_decl_arithmetic_rounds_size; // size_t __gg__arithmetic_rounds_size; +tree var_decl_arithmetic_rounds; // int* __gg__arithmetic_rounds; +tree var_decl_fourplet_flags_size; // size_t __gg__fourplet_flags_size; +tree var_decl_fourplet_flags; // int* __gg__fourplet_flags; + +tree var_decl_treeplet_1f; // cblc_field_pp_type_node , "__gg__treeplet_1f" +tree var_decl_treeplet_1o; // SIZE_T_P , "__gg__treeplet_1o" +tree var_decl_treeplet_1s; // SIZE_T_P , "__gg__treeplet_1s" +tree var_decl_treeplet_2f; // cblc_field_pp_type_node , "__gg__treeplet_2f" +tree var_decl_treeplet_2o; // SIZE_T_P , "__gg__treeplet_2o" +tree var_decl_treeplet_2s; // SIZE_T_P , "__gg__treeplet_2s" +tree var_decl_treeplet_3f; // cblc_field_pp_type_node , "__gg__treeplet_3f" +tree var_decl_treeplet_3o; // SIZE_T_P , "__gg__treeplet_3o" +tree var_decl_treeplet_3s; // SIZE_T_P , "__gg__treeplet_3s" +tree var_decl_treeplet_4f; // cblc_field_pp_type_node , "__gg__treeplet_4f" +tree var_decl_treeplet_4o; // SIZE_T_P , "__gg__treeplet_4o" +tree var_decl_treeplet_4s; // SIZE_T_P , "__gg__treeplet_4s" + +// There are times when I need to insert a NOP into the code, mainly to force +// a .loc directive into the assembly language so that the GDB-COBOL debugger +// can show the COBOL source code. This is true, for example, the CONTINUE +// statement which otherwise would produce no assembly language. Since I +// wasn't successful figuring out how to create an actual NOP assembly language +// instruction, I instead gg_assign(var_decl_nop, integer_zero_node) +tree var_decl_nop; // int __gg__nop; +tree var_decl_main_called; // int __gg__main_called; + +int +get_scaled_rdigits(cbl_field_t *field) + { + int retval; + if( !(field->attr & scaled_e) ) + { + // The value is not P-scaled, so we just use the unchanged rdigits value + retval = field->data.rdigits; + } + else + { + if( field->data.rdigits < 0 ) + { + // The PIC string was something like 999PPPP, which means an rdigits value + // of -4. We return zero; somebody else will have the job of multiplying + // the three significant digits by 10^4 to get the magnitude correct. + retval = 0; + } + else + { + // The PIC string was something like PPPP999, which means an rdigits value + // of +4. We return an rdigits value of 4 + 3 = 7, which will mean that + // the three significant digits will be scaled to 0.0000999 + retval = field->data.digits + field->data.rdigits; + } + } + return retval; + } + +int +get_scaled_digits(cbl_field_t *field) + { + int retval; + if( !(field->attr & scaled_e) ) + { + // The value is not P-scaled, so we just use the unchanged rdigits value + retval = field->data.digits; + } + else + { + if( field->data.rdigits < 0 ) + { + // The PIC string was something like 999PPPP, which means an rdigits value + // of -4. digits is 3, reflecting the 9(3). We return seven, reflecting + // that all of the final digits are to the left of the decimal point + retval = field->data.digits - field->data.rdigits; + } + else + { + // The PIC string was something like PPPP999, which means an rdigits value + // of +4. We return and rdigits value of 4 + 3 = 7, which will mean that + // the three significant digits will be scaled to 0.0000999 and all of the + // seven digits are to the left of the decimal point + retval = field->data.digits + field->data.rdigits; + } + } + return retval; + } + +tree +tree_type_from_digits(size_t digits, int signable) + { + tree retval = NULL_TREE; + + if( signable ) + { + if(digits <= 2 ) + { + retval = CHAR; + } + else if (digits <= 4 ) + { + retval = SHORT; + } + else if (digits <= 9 ) + { + retval = INT; + } + else if (digits <= 18 ) + { + retval = LONGLONG; + } + else + { + retval = INT128; + } + } + else + { + if(digits <= 2 ) + { + retval = UCHAR; + } + else if (digits <= 4 ) + { + retval = USHORT; + } + else if (digits <= 9 ) + { + retval = UINT; + } + else if (digits <= 18 ) + { + retval = ULONGLONG; + } + else + { + retval = UINT128; + } + } + return retval; + } + +void +get_integer_value(tree value, + cbl_field_t *field, + tree offset, + bool check_for_fractional_digits) + { + Analyze(); + // Call this routine when you know the result has to be an integer with no + // rdigits. This routine became necessary the first time I saw an + // intermediate value for an array subscript: table((3 + 1) / 2)) + // + // If the field_i has rdigits, and if any of those rdigits are non-zero, we + // return a 1 so that our caller can decide what to do. + + static tree temp = gg_define_variable(INT128, "..giv_temp", vs_file_static); + static tree rdigits = gg_define_variable(INT, "..giv_rdigits", vs_file_static); + + if( field->attr & intermediate_e ) + { + // Get the binary value, which for 99V99 can be 1234, meaning 12.34 + get_binary_value(temp, NULL, field, offset); + + // Pick up the run-time number of rdigits: + gg_assign(rdigits, gg_cast(INT, member(field, "rdigits"))); + + // Scale by the number of rdigits, which turns 12.34 into 12. + // When check_for_fractional_digits is true, __gg__rdigits will be set + // to 1 for 12.34, and will be set to zero 12.00 + scale_by_power_of_ten(temp, + gg_negate(rdigits), + check_for_fractional_digits); + } + else + { + get_binary_value(temp, rdigits, field, offset); + scale_by_power_of_ten_N(temp, + -get_scaled_rdigits(field), + check_for_fractional_digits); + } + gg_assign(value, gg_cast(TREE_TYPE(value), temp)); + } + +static tree +get_data_offset_dest(cbl_refer_t &refer, + int *pflags = NULL) + { + Analyze(); + // This routine returns a tree which is the size_t offset to the data in the + // refer/field + + // Because this is for destination/receiving variables, OCCURS DEPENDING ON + // is not checked. + + tree retval = gg_define_variable(SIZE_T); + gg_assign(retval, size_t_zero_node); + + // We have a refer. + // At the very least, we have an constant offset + int all_flags = 0; + int all_flag_bit = 1; + + static tree value64 = gg_define_variable(LONG, ".._gdod_value64", vs_file_static); + + if( refer.nsubscript ) + { + // We have at least one subscript: + + // Figure we have three subscripts, so nsubscript is 3 + // Figure that the subscripts are {5, 4, 3} + + // We expect that starting from refer.field, that three of our ancestors -- + // call them A1, A2, and A3 -- have occurs clauses. + + // We need to start with the rightmost subscript, and work our way up through + // our parents. As we find each parent with an OCCURS, we increment qual_data + // by (subscript-1)*An->data.capacity + + // Establish the field_t pointer for walking up through our ancestors: + cbl_field_t *parent = refer.field; + + // Note the backwards test, because refer->nsubscript is an unsigned value + for(size_t i=refer.nsubscript-1; ioccurs.ntimes() ) + { + break; + } + parent = parent_of(parent); + } + // we might have an error condition at this point: + if( !parent ) + { + cbl_internal_error("Too many subscripts"); + } + // Pick up the integer value of the subscript: + static tree subscript = gg_define_variable(LONG, "..gdod_subscript", vs_file_static); + + if( process_this_exception(ec_bound_subscript_e) ) + { + get_integer_value(value64, + refer.subscripts[i].field, + refer_offset_dest(refer.subscripts[i]), + CHECK_FOR_FRACTIONAL_DIGITS); + IF( var_decl_rdigits, + ne_op, + integer_zero_node ) + { + if( enabled_exceptions.match(ec_bound_subscript_e) ) + { + // The subscript isn't an integer + SET_EXCEPTION_CODE(ec_bound_subscript_e); + gg_assign(subscript, gg_cast(TREE_TYPE(subscript), integer_zero_node)); + } + else + { + rt_error("error: a table subscript is not an integer"); + } + } + ELSE + { + gg_assign(subscript, gg_cast(TREE_TYPE(subscript), value64)); + } + ENDIF + } + else + { + get_integer_value(subscript, + refer.subscripts[i].field, + refer_offset_dest(refer.subscripts[i])); + } + + // gg_printf("%s(): We have a subscript of %d from %s\n", + // gg_string_literal(__func__), + // subscript, + // gg_string_literal(refer.subscripts[i].field->name), + // NULL_TREE); + + if( (refer.subscripts[i].field->attr & FIGCONST_MASK) == zero_value_e ) + { + // This refer is a figconst ZERO; we treat it as an ALL ZERO + // This is our internal representation for ALL, as in TABLE(ALL) + + // Set the subscript to 1 + gg_assign(subscript, + build_int_cst_type( TREE_TYPE(subscript), 1)); + // Flag this position as ALL + all_flags |= all_flag_bit; + } + all_flag_bit <<= 1; + + // Subscript is now a one-based integer + // Make it zero-based: + + gg_decrement(subscript); + if( process_this_exception(ec_bound_subscript_e) ) + { + // gg_printf("process_this_exception is true\n", NULL_TREE); + IF( subscript, lt_op, gg_cast(TREE_TYPE(subscript), integer_zero_node) ) + { + // The subscript is too small + SET_EXCEPTION_CODE(ec_bound_subscript_e); + gg_assign(subscript, gg_cast(TREE_TYPE(subscript), integer_zero_node)); + } + ELSE + { + // gg_printf("parent->occurs.ntimes() is %d\n", build_int_cst_type(INT, parent->occurs.ntimes()), NULL_TREE); + IF( subscript, + ge_op, + build_int_cst_type(TREE_TYPE(subscript), parent->occurs.ntimes()) ) + { + // The subscript is too large + if( enabled_exceptions.match(ec_bound_subscript_e) ) + { + SET_EXCEPTION_CODE(ec_bound_subscript_e); + gg_assign(subscript, gg_cast(TREE_TYPE(subscript), integer_zero_node)); + } + else + { + rt_error("error: table subscript is too large"); + } + } + ELSE + { + // We have a good subscript: + // Check for an ODO violation: + if( parent->occurs.depending_on ) + { + cbl_field_t *depending_on = cbl_field_of(symbol_at(parent->occurs.depending_on)); + get_integer_value(value64, depending_on); + IF( subscript, ge_op, value64 ) + { + gg_assign(var_decl_odo_violation, integer_one_node); + } + ELSE + ENDIF + } + + tree augment = gg_multiply(subscript, build_int_cst_type(INT, parent->data.capacity)); + gg_assign(retval, gg_add(retval, gg_cast(SIZE_T, augment))); + } + ENDIF + } + ENDIF + } + else + { + // Assume a good subscript: + // Check for an ODO violation: + if( parent->occurs.depending_on ) + { + cbl_field_t *depending_on = cbl_field_of(symbol_at(parent->occurs.depending_on)); + get_integer_value(value64, depending_on); + IF( subscript, ge_op, value64 ) + { + gg_assign(var_decl_odo_violation, integer_one_node); + } + ELSE + ENDIF + } + tree augment = gg_multiply(subscript, build_int_cst_type(INT, parent->data.capacity)); + gg_assign(retval, gg_add(retval, gg_cast(SIZE_T, augment))); + } + parent = parent_of(parent); + } + } + + if( refer.refmod.from ) + { + // We have a refmod to deal with + static tree refstart = gg_define_variable(LONG, "..gdos_refstart", vs_file_static); + + if( process_this_exception(ec_bound_ref_mod_e) ) + { + get_integer_value(value64, + refer.refmod.from->field, + refer_offset_source(*refer.refmod.from), + CHECK_FOR_FRACTIONAL_DIGITS); + IF( var_decl_rdigits, + ne_op, + integer_zero_node ) + { + // refmod offset is not an integer, and has to be + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(refstart, gg_cast(LONG, integer_one_node)); + } + else + { + rt_error("error: a refmod FROM is not an integer"); + } + } + ELSE + gg_assign(refstart, value64); + ENDIF + } + else + { + get_integer_value(value64, + refer.refmod.from->field, + refer_offset_source(*refer.refmod.from) + ); + gg_assign(refstart, value64); + } + + // Make refstart zero-based: + gg_decrement(refstart); + + if( process_this_exception(ec_bound_ref_mod_e) ) + { + IF( refstart, lt_op, gg_cast(LONG, integer_zero_node) ) + { + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(refstart, gg_cast(LONG, integer_zero_node)); + } + else + { + rt_error("error: refmod FROM is less than one"); + } + } + ELSE + { + IF( refstart, gt_op, build_int_cst_type(LONG, refer.field->data.capacity) ) + { + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(refstart, gg_cast(LONG, integer_zero_node)); + } + else + { + rt_error("error: refmod FROM is too large"); + } + } + ELSE + ENDIF + } + ENDIF + } + + // We have a good refstart + gg_assign(retval, gg_add(retval, gg_cast(SIZE_T, refstart))); + } + + if( pflags ) + { + *pflags = all_flags; + } + +// gg_printf("*****>>>>> %s(): returning %p\n", +// gg_string_literal(__func__), +// retval, +// NULL_TREE); + return retval; + } + +static tree +get_data_offset_source(cbl_refer_t &refer, + int *pflags = NULL) + { + Analyze(); + // This routine returns a tree which is the size_t offset to the data in the + // refer/field + + // Because this is for source / sending variables, checks are made for + // OCCURS DEPENDING ON violations (when those exceptions are enabled) + + tree retval = gg_define_variable(SIZE_T); + gg_assign(retval, size_t_zero_node); + + // We have a refer. + // At the very least, we have an constant offset + int all_flags = 0; + int all_flag_bit = 1; + + static tree value64 = gg_define_variable(LONG, ".._gdos_value64", vs_file_static); + + if( refer.nsubscript ) + { + // We have at least one subscript: + + // Figure we have three subscripts, so nsubscript is 3 + // Figure that the subscripts are {5, 4, 3} + + // We expect that starting from refer.field, that three of our ancestors -- + // call them A1, A2, and A3 -- have occurs clauses. + + // We need to start with the rightmost subscript, and work our way up through + // our parents. As we find each parent with an OCCURS, we increment qual_data + // by (subscript-1)*An->data.capacity + + // Establish the field_t pointer for walking up through our ancestors: + cbl_field_t *parent = refer.field; + + // Note the backwards test, because refer->nsubscript is an unsigned value + for(size_t i=refer.nsubscript-1; ioccurs.ntimes() ) + { + break; + } + parent = parent_of(parent); + } + // we might have an error condition at this point: + if( !parent ) + { + cbl_internal_error("Too many subscripts"); + } + // Pick up the integer value of the subscript: +// static tree subscript = gg_define_variable(LONG, "..gdos_subscript", vs_file_static); + tree subscript = gg_define_variable(LONG); + + if( process_this_exception(ec_bound_subscript_e) ) + { + get_integer_value(value64, + refer.subscripts[i].field, + refer_offset_source(refer.subscripts[i]), + CHECK_FOR_FRACTIONAL_DIGITS); + IF( var_decl_rdigits, + ne_op, + integer_zero_node ) + { + if( enabled_exceptions.match(ec_bound_subscript_e) ) + { + // The subscript isn't an integer + SET_EXCEPTION_CODE(ec_bound_subscript_e); + gg_assign(subscript, gg_cast(TREE_TYPE(subscript), integer_zero_node)); + } + else + { + rt_error("error: a table subscript is not an integer"); + } + } + ELSE + { + gg_assign(subscript, gg_cast(TREE_TYPE(subscript), value64)); + } + ENDIF + } + else + { + get_integer_value(subscript, + refer.subscripts[i].field, + refer_offset_source(refer.subscripts[i])); + } + + // gg_printf("%s(): We have a subscript of %d from %s\n", + // gg_string_literal(__func__), + // subscript, + // gg_string_literal(refer.subscripts[i].field->name), + // NULL_TREE); + + if( (refer.subscripts[i].field->attr & FIGCONST_MASK) == zero_value_e ) + { + // This refer is a figconst ZERO; we treat it as an ALL ZERO + // This is our internal representation for ALL, as in TABLE(ALL) + + // Set the subscript to 1 + gg_assign(subscript, + build_int_cst_type( TREE_TYPE(subscript), 1)); + // Flag this position as ALL + all_flags |= all_flag_bit; + } + all_flag_bit <<= 1; + + // Subscript is now a one-based integer + // Make it zero-based: + + gg_decrement(subscript); + if( process_this_exception(ec_bound_subscript_e) ) + { + // gg_printf("process_this_exception is true\n", NULL_TREE); + IF( subscript, lt_op, gg_cast(TREE_TYPE(subscript), integer_zero_node) ) + { + // The subscript is too small + SET_EXCEPTION_CODE(ec_bound_subscript_e); + gg_assign(subscript, gg_cast(TREE_TYPE(subscript), integer_zero_node)); + } + ELSE + { + // gg_printf("parent->occurs.ntimes() is %d\n", build_int_cst_type(INT, parent->occurs.ntimes()), NULL_TREE); + IF( subscript, + ge_op, + build_int_cst_type(TREE_TYPE(subscript), parent->occurs.ntimes()) ) + { + // The subscript is too large + if( enabled_exceptions.match(ec_bound_subscript_e) ) + { + SET_EXCEPTION_CODE(ec_bound_subscript_e); + gg_assign(subscript, gg_cast(TREE_TYPE(subscript), integer_zero_node)); + } + else + { + rt_error("error: table subscript is too large"); + } + } + ELSE + { + // We have a good subscript: + // Check for an ODO violation: + if( parent->occurs.depending_on ) + { + cbl_field_t *depending_on = cbl_field_of(symbol_at(parent->occurs.depending_on)); + get_integer_value(value64, depending_on); + IF( subscript, ge_op, value64 ) + { + gg_assign(var_decl_odo_violation, integer_one_node); + } + ELSE + ENDIF + } + + tree augment = gg_multiply(subscript, build_int_cst_type(INT, parent->data.capacity)); + gg_assign(retval, gg_add(retval, gg_cast(SIZE_T, augment))); + } + ENDIF + } + ENDIF + } + else + { + // Assume a good subscript: + // Check for an ODO violation: + if( parent->occurs.depending_on ) + { + cbl_field_t *depending_on = cbl_field_of(symbol_at(parent->occurs.depending_on)); + get_integer_value(value64, depending_on); + IF( subscript, ge_op, value64 ) + { + gg_assign(var_decl_odo_violation, integer_one_node); + } + ELSE + ENDIF + } + tree augment = gg_multiply(subscript, build_int_cst_type(INT, parent->data.capacity)); + gg_assign(retval, gg_add(retval, gg_cast(SIZE_T, augment))); + } + parent = parent_of(parent); + } + } + + if( refer.refmod.from ) + { + // We have a refmod to deal with + static tree refstart = gg_define_variable(LONG, "..gdo_refstart", vs_file_static); + + if( process_this_exception(ec_bound_ref_mod_e) ) + { + get_integer_value(value64, + refer.refmod.from->field, + refer_offset_source(*refer.refmod.from), + CHECK_FOR_FRACTIONAL_DIGITS); + IF( var_decl_rdigits, + ne_op, + integer_zero_node ) + { + // refmod offset is not an integer, and has to be + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(refstart, gg_cast(LONG, integer_one_node)); + } + else + { + rt_error("error: a refmod FROM is not an integer"); + } + } + ELSE + gg_assign(refstart, value64); + ENDIF + } + else + { + get_integer_value(value64, + refer.refmod.from->field, + refer_offset_source(*refer.refmod.from) + ); + gg_assign(refstart, value64); + } + + // Make refstart zero-based: + gg_decrement(refstart); + + if( process_this_exception(ec_bound_ref_mod_e) ) + { + IF( refstart, lt_op, gg_cast(LONG, integer_zero_node) ) + { + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(refstart, gg_cast(LONG, integer_zero_node)); + } + else + { + rt_error("error: refmod FROM is less than one"); + } + } + ELSE + { + IF( refstart, gt_op, build_int_cst_type(LONG, refer.field->data.capacity) ) + { + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(refstart, gg_cast(LONG, integer_zero_node)); + } + else + { + rt_error("error: refmod FROM is too large"); + } + } + ELSE + ENDIF + } + ENDIF + } + + // We have a good refstart + gg_assign(retval, gg_add(retval, gg_cast(SIZE_T, refstart))); + } + + if( pflags ) + { + *pflags = all_flags; + } + + +// gg_printf("*****>>>>> %s(): returning %p\n", +// gg_string_literal(__func__), +// retval, +// NULL_TREE); + return retval; + } + +void +get_binary_value( tree value, + tree rdigits, + cbl_field_t *field, + tree field_offset, + tree hilo + ) + { + Analyze(); + if( hilo ) + { + gg_assign(hilo, integer_zero_node); + } + + bool needs_scaling = true; + static const bool debugging=false; + + // Very special case: + if( strcmp(field->name, "ZEROS") == 0 ) + { + gg_assign(value, gg_cast(TREE_TYPE(value), integer_zero_node)); + if( rdigits ) + { + gg_assign(rdigits, gg_cast(TREE_TYPE(rdigits), integer_zero_node)); + } + return; + } + + static tree pointer = gg_define_variable(UCHAR_P, "..gbv_pointer", vs_file_static); + static tree pend = gg_define_variable(UCHAR_P, "..gbv_pend", vs_file_static); + + switch(field->type) + { + case FldLiteralN: + { + if( SCALAR_FLOAT_TYPE_P(value) ) + { + gg_assign(value, gg_cast(TREE_TYPE(value), field->literal_decl_node)); + } + else + { + if( rdigits ) + { + gg_assign(rdigits, build_int_cst_type(TREE_TYPE(rdigits), + field->data.rdigits)); + } + tree dest_type = TREE_TYPE(value); + tree source_type = tree_type_from_field(field); + + gg_assign(value, + gg_cast(dest_type, + gg_indirect( gg_cast(build_pointer_type(source_type), + gg_get_address_of(field->data_decl_node))))); + } + + break; + } + + case FldNumericDisplay: + { + Analyzer.Message("FldNumericDisplay"); + // Establish the source + tree source_address = get_data_address(field, field_offset); + + // We need to check early on for HIGH-VALUE and LOW-VALUE + // Pick up the byte + tree digit = gg_get_indirect_reference(source_address, NULL_TREE); + IF( digit, eq_op, build_int_cst(UCHAR, 0xFF) ) + { + if( hilo ) + { + gg_assign(hilo, integer_one_node); + } + if( rdigits ) + { + gg_assign(rdigits, + build_int_cst_type( TREE_TYPE(rdigits), + get_scaled_rdigits(field))); + } + gg_assign(value, build_int_cst_type(TREE_TYPE(value), 0xFFFFFFFFFFFFFFFUL)); + } + ELSE + { + IF( digit, eq_op, build_int_cst(UCHAR, 0x00) ) + { + if( hilo ) + { + gg_assign(hilo, integer_minus_one_node); + } + } + ELSE + { + // Establish rdigits: + if( rdigits ) + { + gg_assign(rdigits, + build_int_cst_type( TREE_TYPE(rdigits), + get_scaled_rdigits(field))); + } + // Zero out the destination + gg_assign(value, gg_cast(TREE_TYPE(value), integer_zero_node)); + // Pick up a pointer to the source bytes: + + gg_assign(pointer, source_address); + + // This is the we-are-done pointer + gg_assign(pend, gg_add( pointer, + build_int_cst_type(SIZE_T, field->data.capacity))); + + static tree signbyte = gg_define_variable(UCHAR, "..gbv_signbyte", vs_file_static); + + // The big decision is whether or not the variable is signed: + if( field->attr & signable_e ) + { + // The variable is signed + if( field->attr & separate_e ) + { + // The sign byte is separate + if( field->attr & leading_e) + { + // The first byte is '+' or '-' + gg_increment(pointer); + } + else + { + // The final byte is '+' or '-' + gg_decrement(pend); + } + } + else + { + // The sign byte is internal + if( field->attr & leading_e) + { + // The first byte has the sign bit: + gg_assign(signbyte, + gg_get_indirect_reference(source_address, NULL_TREE)); + if( internal_codeset_is_ebcdic() ) + { + // We need to make sure the EBCDIC sign bit is ON, for positive + gg_assign(gg_get_indirect_reference(source_address, NULL_TREE), + gg_bitwise_or(signbyte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT))); + } + else + { + // We need to make sure the ascii sign bit is Off, for positive + gg_assign(gg_get_indirect_reference(source_address, NULL_TREE), + gg_bitwise_and( signbyte, + build_int_cst_type( UCHAR, + ~NUMERIC_DISPLAY_SIGN_BIT))); + } + } + else + { + // The final byte has the sign bit: + gg_assign(signbyte, + gg_get_indirect_reference(source_address, + build_int_cst_type(SIZE_T, + field->data.capacity-1))); + if( internal_codeset_is_ebcdic() ) + { + // We need to make sure the EBCDIC sign bit is ON, for positive + gg_assign(gg_get_indirect_reference(source_address, + build_int_cst_type( SIZE_T, + field->data.capacity-1)), + gg_bitwise_or(signbyte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT))); + } + else + { + // We need to make sure the ASCII sign bit is Off, for positive + gg_assign(gg_get_indirect_reference(source_address, + build_int_cst_type( SIZE_T, + field->data.capacity-1)), + gg_bitwise_and( signbyte, + build_int_cst_type( UCHAR, + ~NUMERIC_DISPLAY_SIGN_BIT))); + } + } + } + } + // We can now set up the byte-by-byte processing loop: + if( internal_codeset_is_ebcdic() ) + { + // We are working in EBCDIC + WHILE( pointer, lt_op, pend ) + { + // Pick up the byte + digit = gg_get_indirect_reference(pointer, NULL_TREE); + IF( digit, lt_op, build_int_cst_type(UCHAR, EBCDIC_ZERO) ) + { + // break on a non-digit + gg_assign(pointer, pend); + } + ELSE + { + IF( digit, gt_op, build_int_cst_type(UCHAR, EBCDIC_NINE) ) + { + // break on a non-digit + gg_assign(pointer, pend); + } + ELSE + { + // Whether ASCII or EBCDIC, the bottom four bits tell the tale: + // Multiply our accumulator by ten: + gg_assign(value, gg_multiply(value, build_int_cst_type(TREE_TYPE(value), 10))); + // And add in the current digit + gg_assign(value, + gg_add(value, gg_cast(TREE_TYPE(value), gg_bitwise_and( digit, + build_int_cst_type(UCHAR, 0x0F) )))); + gg_increment(pointer); + } + ENDIF + } + ENDIF + } + WEND + } + else + { + // We are working in ASCII: + WHILE( pointer, lt_op, pend ) + { + // Pick up the byte + digit = gg_get_indirect_reference(pointer, NULL_TREE); + // Whether ASCII or EBCDIC, the bottom four bits tell the tale: + // Multiply our accumulator by ten: + gg_assign(value, gg_multiply(value, build_int_cst_type(TREE_TYPE(value), 10))); + // And add in the current digit + gg_assign(value, gg_add(value, gg_cast(TREE_TYPE(value), gg_bitwise_and(digit, build_int_cst_type(UCHAR, 0x0F))))); + gg_increment(pointer); + } + WEND + } + + // Value contains the binary value. The last thing is to apply -- and + // undo -- the signable logic: + + if( field->attr & signable_e ) + { + // The variable is signed + if( field->attr & separate_e ) + { + // The sign byte is separate + if( field->attr & leading_e) + { + // The first byte is '+' or '-' + if( internal_codeset_is_ebcdic() ) + { + // We are operating in EBCDIC, so we look for a 96 (is minus sign) + IF( gg_get_indirect_reference(source_address, NULL_TREE), + eq_op, + build_int_cst_type(UCHAR, 96) ) + { + gg_assign(value, gg_negate(value)); + } + ELSE + ENDIF + } + else + { + // We are operating in ASCII + IF( gg_get_indirect_reference(source_address, NULL_TREE), + eq_op, + build_int_cst_type(UCHAR, '-') ) + { + gg_assign(value, gg_negate(value)); + } + ELSE + ENDIF + } + } + else + { + // The final byte is '+' or '-' + if( internal_codeset_is_ebcdic() ) + { + // We are operating in EBCDIC, so we look for a 96 (is minus sign) + IF( gg_get_indirect_reference(source_address, build_int_cst_type(SIZE_T, field->data.capacity-1)), + eq_op, + build_int_cst_type(UCHAR, 96) ) + { + gg_assign(value, gg_negate(value)); + } + ELSE + ENDIF + } + else + { + // We are operating in ASCII + IF( gg_get_indirect_reference(source_address, build_int_cst_type(SIZE_T, field->data.capacity-1)), + eq_op, + build_int_cst_type(UCHAR, '-') ) + { + gg_assign(value, gg_negate(value)); + } + ELSE + ENDIF + } + } + } + else + { + // The sign byte is internal. Check the sign bit + if(internal_codeset_is_ebcdic()) + { + IF( gg_bitwise_and( signbyte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT)), eq_op, build_int_cst_type(UCHAR, 0) ) + { + // The EBCDIC sign bit was OFF, so negate the result + gg_assign(value, gg_negate(value)); + } + ELSE + ENDIF + } + else + { + IF( gg_bitwise_and( signbyte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT)), ne_op, build_int_cst_type(UCHAR, 0) ) + { + // The ASCII sign bit was on, so negate the result + gg_assign(value, gg_negate(value)); + } + ELSE + ENDIF + } + // It's time to put back the original data: + if( field->attr & leading_e) + { + // The first byte has the sign bit: + gg_assign(gg_get_indirect_reference(source_address, NULL_TREE), + signbyte); + } + else + { + // The final byte has the sign bit: + gg_assign(gg_get_indirect_reference(source_address, + build_int_cst_type(SIZE_T, field->data.capacity-1)), + signbyte); + } + } + } + } + ENDIF + } + ENDIF + + break; + } + + case FldNumericBinary: + { + // As of this writing, the source value is big-endian + // We have to convert it to a little-endian destination. + tree dest = gg_cast(build_pointer_type(UCHAR), gg_get_address_of(value)); + tree source = get_data_address(field, field_offset); + + size_t dest_nbytes = gg_sizeof(value); + size_t source_nbytes = field->data.capacity; + + if( debugging ) + { + gg_printf("dest_bytes/source_bytes %ld/%ld\n", + build_int_cst_type(SIZE_T, dest_nbytes), + build_int_cst_type(SIZE_T, source_nbytes), + NULL_TREE); + gg_printf("Starting value: ", NULL_TREE); + hex_dump(source, source_nbytes); + gg_printf("\n", NULL_TREE); + } + + if( dest_nbytes <= source_nbytes ) + { + // Destination is too small. We will move what we can, throwing away + // the most significant source bytes: + for(size_t i=0; iattr & signable_e ) + { + IF( gg_array_value(gg_cast(build_pointer_type(SCHAR), source)), lt_op, gg_cast(SCHAR, integer_zero_node) ) + { + gg_assign(extension, build_int_cst_type(UCHAR, 0xFF)); + } + ELSE + { + gg_assign(extension, build_int_cst_type(UCHAR, 0)); + } + ENDIF + } + else + { + gg_assign(extension, build_int_cst_type(UCHAR, 0)); + } + + // Flip the source end-for-end and put it into the dest: + size_t i=0; + while(i < source_nbytes) + { + gg_assign(gg_array_value(dest, i), + gg_array_value(source, source_nbytes-1-i) ); + i += 1; + } + // Fill the extra high-end bytes with 0x00 or 0xFF extension + + while(i < dest_nbytes) + { + gg_assign(gg_array_value(dest, i), + extension); + i += 1; + } + } + if( debugging ) + { + gg_printf("Ending value: ", NULL_TREE); + hex_dump(dest, dest_nbytes); + gg_printf("\n", NULL_TREE); + } + break; + } + + case FldNumericBin5: + case FldIndex: + case FldPointer: + { + if( field->attr & intermediate_e ) + { + // It is a intermediate, so rdigits has to come from the run-time structure + if( rdigits ) + { + gg_assign(rdigits, + gg_cast( TREE_TYPE(rdigits), + member(field, "rdigits"))); + } + } + else + { + // It isn't an intermediate, so we can safely use field->rdigits + if( rdigits ) + { + gg_assign(rdigits, + build_int_cst_type( TREE_TYPE(rdigits), + get_scaled_rdigits(field))); + } + } + tree source_address = get_data_address(field, field_offset); + tree dest_type = TREE_TYPE(value); + tree source_type = tree_type_from_size( field->data.capacity, + field->attr & signable_e); + if( debugging && rdigits) + { + gg_printf("get_binary_value bin5 rdigits: %d\n", rdigits, NULL_TREE); + } + + gg_assign(value, + gg_cast(dest_type, + gg_indirect(gg_cast( build_pointer_type(source_type), + source_address )))); + break; + } + + case FldPacked: + { + // Zero out the destination: + gg_assign(value, gg_cast(TREE_TYPE(value), integer_zero_node)); + gg_assign(pointer, get_data_address(field, field_offset)); + gg_assign(pend, + gg_add(pointer, + build_int_cst_type(SIZE_T, field->data.capacity-1))); + + // Convert all but the last byte of the packed decimal sequence + WHILE( pointer, lt_op, pend ) + { + // Convert the first nybble + gg_assign(value, gg_multiply(value, build_int_cst_type(TREE_TYPE(value), 10))); + gg_assign(value, gg_add(value, gg_cast(TREE_TYPE(value), gg_rshift(gg_get_indirect_reference(pointer, NULL_TREE), build_int_cst(UINT, 4))))); + + // Convert the second nybble + gg_assign(value, gg_multiply(value, build_int_cst_type(TREE_TYPE(value), 10))); + gg_assign(value, gg_add(value, gg_cast(TREE_TYPE(value), gg_bitwise_and(gg_get_indirect_reference(pointer, NULL_TREE), build_int_cst_type(UCHAR, 0xF))))); + gg_increment(pointer); + } + WEND + + // This is the final byte: + gg_assign(value, gg_multiply(value, build_int_cst_type(TREE_TYPE(value), 10))); + gg_assign(value, gg_add(value, gg_cast(TREE_TYPE(value), gg_rshift(gg_get_indirect_reference(pointer, NULL_TREE), build_int_cst(UINT, 4))))); + + IF( gg_bitwise_and(gg_get_indirect_reference(pointer, NULL_TREE), build_int_cst_type(UCHAR, 0xF)), eq_op, build_int_cst_type(UCHAR, 0x0D) ) + { + gg_assign(value, gg_negate(value)); + } + ELSE + { + IF( gg_bitwise_and(gg_get_indirect_reference(pointer, NULL_TREE), build_int_cst_type(UCHAR, 0xF)), eq_op, build_int_cst_type(UCHAR, 0x0B) ) + { + gg_assign(value, gg_negate(value)); + } + ELSE + ENDIF + } + ENDIF + break; + } + + case FldFloat: + { + // We are going to assume that the float value contains an integer. + if( rdigits ) + { + gg_assign(rdigits, + gg_cast( TREE_TYPE(rdigits), integer_zero_node)); + } + gg_assign(value, + gg_cast(TREE_TYPE(value), + gg_call_expr( INT128, + "__gg__integer_from_float128", + gg_get_address_of(field->var_decl_node), + NULL_TREE))); + needs_scaling = false; + break; + } + + case FldAlphanumeric: + { + + } + + + default: + { + fprintf(stderr, "%s(): We know not how to" + " get a binary value from %s\n", + __func__, + cbl_field_type_str(field->type) ); + abort(); + break; + } + } + + if( needs_scaling ) + { + if( field->attr & scaled_e ) + { + if( field->data.rdigits < 0 ) + { + scale_by_power_of_ten_N(value, -field->data.rdigits); + } + } + } + } + +tree +tree_type_from_field(cbl_field_t *field) + { + gcc_assert(field); + return tree_type_from_size(field->data.capacity, field->attr & signable_e); + } + +tree +get_data_address( cbl_field_t *field, + tree offset) // Offset is SIZE_T + { + if( offset ) + { + return gg_cast( UCHAR_P, + gg_add( gg_cast(SIZE_T, + member( field->var_decl_node, + "data")), + offset)); + } + else + { + return member(field->var_decl_node, "data"); + } + } + +__int128 +get_power_of_ten(int n) + { + // 2** 64 = 1.8E19 + // 2**128 = 3.4E38 + __int128 retval = 1; + static const int MAX_POWER = 19 ; + static const __int128 pos[MAX_POWER+1] = + { + 1ULL, // 00 + 10ULL, // 01 + 100ULL, // 02 + 1000ULL, // 03 + 10000ULL, // 04 + 100000ULL, // 05 + 1000000ULL, // 06 + 10000000ULL, // 07 + 100000000ULL, // 08 + 1000000000ULL, // 09 + 10000000000ULL, // 10 + 100000000000ULL, // 11 + 1000000000000ULL, // 12 + 10000000000000ULL, // 13 + 100000000000000ULL, // 14 + 1000000000000000ULL, // 15 + 10000000000000000ULL, // 16 + 100000000000000000ULL, // 17 + 1000000000000000000ULL, // 18 + 10000000000000000000ULL, // 19 + }; + if( n < 0 || n>MAX_POWER*2) // The most we can handle is 10**38 + { + fprintf(stderr, "Trying to raise 10 to %d as an int128, which we can't do.\n", n); + fprintf(stderr, "The problem is in %s.\n", __func__); + abort(); + } + if( n <= MAX_POWER ) + { + // Up to 10**18 we do directly: + retval = pos[n]; + } + else + { + // 19 through 38 is handled in a second step, because when this was written, + // GCC couldn't handle __int128 constants: + retval = pos[n/2]; + retval *= retval; + if( n & 1 ) + { + retval *= 10; + } + } + return retval; + } + +void +scale_by_power_of_ten_N(tree value, + int N, + bool check_for_fractional) + { + // This routine is called when we know N at compile time. + + Analyze(); + Analyzer.Message("takes int N"); + if( N == 0 ) + { + if( check_for_fractional ) + { + gg_assign(var_decl_rdigits, integer_zero_node); + } + } + else if( N > 0 ) + { + if( check_for_fractional ) + { + gg_assign(var_decl_rdigits, integer_zero_node); + } + tree value_type = TREE_TYPE(value); + __int128 power_of_ten = get_power_of_ten(N); + gg_assign(value, gg_multiply(value, build_int_cst_type( value_type, + power_of_ten))); + } + if( N < 0 ) + { + tree value_type = TREE_TYPE(value); + __int128 power_of_ten = get_power_of_ten(-N); + if( check_for_fractional ) + { + IF( gg_mod(value, build_int_cst_type( value_type, + power_of_ten)), + ne_op, + gg_cast(value_type, integer_zero_node) ) + { + gg_assign(var_decl_rdigits, integer_one_node); + } + ELSE + gg_assign(var_decl_rdigits, integer_zero_node); + ENDIF + } + gg_assign(value, gg_divide(value, build_int_cst_type( value_type, + power_of_ten))); + } + } + +tree +scale_by_power_of_ten(tree value, + tree N, + bool check_for_fractional) + { + Analyze(); + static tree retval = gg_define_variable(INT, "..sbpot2_retval", vs_file_static); + + if( check_for_fractional ) + { + // Our caller expects us to return 1 if value was something like 99v99 and + // the fractional part was non-zero + gg_assign(value, + gg_cast(TREE_TYPE(value), + gg_call_expr(INT128, + "__gg__scale_by_power_of_ten_1", + gg_cast(INT128, value), + N, + NULL_TREE))); + } + else + { + // Our caller does not expect us to test for fractional values + gg_assign(value, + gg_cast(TREE_TYPE(value), + gg_call_expr(INT128, + "__gg__scale_by_power_of_ten_2", + gg_cast(INT128, value), + N, + NULL_TREE))); + + } + + gg_assign(retval, integer_zero_node); + return retval; + } + +void +scale_and_round(tree value, + int value_rdigits, + bool target_is_signable, + int target_rdigits, + cbl_round_t rounded) + { + if( !target_is_signable ) + { + // The target has to be positive, so take the absolute value of the input + gg_assign(value, gg_abs(value)); + } + + if( target_rdigits >= value_rdigits ) + { + // The value doesn't have enough rdigits. All we need to do is multiply it + // by a power of ten to get it right: + scale_by_power_of_ten_N(value, + target_rdigits - value_rdigits); + } + else + { + // The value has too few rdigits. + switch(rounded) + { + case nearest_away_from_zero_e: + { + // This is rounding away from zero + + // We want to adjust value so that the extra digit is in the units + // place: + scale_by_power_of_ten_N(value, + target_rdigits - value_rdigits + 1); + // Add five to the result: + IF( value, lt_op, gg_cast(TREE_TYPE(value), integer_zero_node) ) + { + gg_assign(value, + gg_add( value, + build_int_cst_type(TREE_TYPE(value), -5))); + } + ELSE + { + gg_assign(value, + gg_add( value, + build_int_cst_type(TREE_TYPE(value), +5))); + } + // And now get rid of the lowest decimal digit + scale_by_power_of_ten_N(value, -1); + + break; + } + + case truncation_e: + { + // Without rounding, just scale the result + scale_by_power_of_ten_N(value, target_rdigits - value_rdigits); + break; + } + default: + abort(); + break; + } + } + } + +void +hex_dump(tree data, size_t bytes) + { + gg_printf("0x", NULL_TREE); + for(size_t i=0; iattr & (intermediate_e)) ) + { + // This field can't have a DEPENDING ON + return false; + } + + // Check if there there is an occurs with a depending_on in the hierarchy + bool proceed = false; + cbl_field_t *odo = symbol_find_odo(refer.field); + cbl_field_t *depending_on; + if( odo && odo != refer.field ) + { + // We have an ODO and refer.field is not the ODO, so we can keep looking + depending_on = cbl_field_of(symbol_at(odo->occurs.depending_on)); + if( depending_on->var_decl_node ) + { + // The depending_on has been initialized + if( refer_type == refer_source ) + { + proceed = true; + } + else + { + // In ISO/IEC 1989:2023, "OCCURS 13.18.38.4 General rules", talks about the + // three situations we know how to deal with. + + // Rule 7) We need to detect if depending_on is completely independent + // of refer.field + cbl_field_t *p; + cbl_field_t *parent1 = refer.field; + while( (p = parent_of(parent1)) ) + { + parent1 = p; + } + cbl_field_t *parent2 = depending_on; + while( (p = parent_of(parent2)) ) + { + parent2 = p; + } + if( parent1 != parent2 ) + { + // refer.field and depending_on have two different ultimate parents, so + // Rule 7) applies, and we have to trim the destination according to + // depending_on + //gg_printf("Rule 7 applies\n", NULL_TREE); + proceed = true; + } + else + { + // Rule 7) doesn't apply, so we have to check Rule 8) + // In this case: + // 01 digtab. + // 05 depl pic 9. + // 05 digitgrp. + // 10 digits occurs 1 to 9 depending on depl pic x. + // MOVE ... TO digitgrp + // The DEPENDING ON variable depl is not subordinate to digitgrp, and + // consequently we have to trim according to depl: + if( depending_on->offset < refer.field->offset ) + { + // depending_on comes before refer.field, so rule 8a) applies + //gg_printf("Rule 8a) applies\n", NULL_TREE); + proceed = true; + } + else + { + // depending_on comes after refer.field, so Rule 8b) for receiving + // items applies, and we will not trim according to depending_on + //gg_printf("Rule 8b) applies\n", NULL_TREE); + } + } + } + } + } + return proceed; + } + +void +set_exception_code_func(ec_type_t ec, int /*line*/, int from_raise_statement) + { + if( ec ) + { + gg_call(VOID, + "__gg__set_exception_code", + build_int_cst_type(INT, ec), + build_int_cst_type(INT, from_raise_statement), + NULL_TREE); + } + else + { + gg_printf("set_exception_code: set it to ZERO\n", NULL_TREE); + gg_assign(var_decl_exception_code, integer_zero_node); + } + } + +bool +process_this_exception(ec_type_t ec) + { + bool retval; + if( enabled_exceptions.match(ec) || !skip_exception_processing ) + { + retval = true; + } + else + { + retval = false; + } + return retval; + } + +void +rt_error(const char *msg) + { + // Come here with a fatal run-time error message + char ach[256]; + snprintf( ach, sizeof(ach), "%s:%d: %s", + current_filename.back().c_str(), + CURRENT_LINE_NUMBER, + msg); + gg_printf("%s\n", gg_string_literal(ach), NULL_TREE); + gg_abort(); + } + +void +copy_little_endian_into_place(cbl_field_t *dest, + tree dest_offset, + tree value, + int rhs_rdigits, + bool check_for_error, + tree &size_error) + { + if( check_for_error ) + { + // We need to see if value can fit into destref + + // We do this by comparing value to 10^(lhs.ldigits + rhs_rdigits) + // Example: rhs is 123.45, whichis 12345 with rdigits 2 + // lhs is 99.999. So, lhs.digits is 5, and lhs.rdigits is 3. + // 10^(5 - 3 + 2) is 10^4, which is 10000. Because 12345 is >= 10000, the + // source can't fit into the destination. + + // Note: I am not trying to avoid the use of stack variables, because I am + // not sure how to declare a file-static variable of unknown type. + tree abs_value = gg_define_variable(TREE_TYPE(value)); + IF( value, lt_op, build_int_cst_type(TREE_TYPE(value), 0) ) + { + gg_assign(abs_value, gg_negate(value)); + } + ELSE + { + gg_assign(abs_value, value); + } + ENDIF + + __int128 power_of_ten = get_power_of_ten( dest->data.digits + - dest->data.rdigits + + rhs_rdigits ); + IF( gg_cast(INT128, abs_value), + ge_op, + build_int_cst_type(INT128, power_of_ten) ) + { + // Flag the size error + gg_assign(size_error, integer_one_node); + } + ELSE + ENDIF + } + scale_by_power_of_ten_N(value, dest->data.rdigits - rhs_rdigits); + + tree dest_type = tree_type_from_size( dest->data.capacity, + dest->attr & signable_e); + tree dest_pointer = gg_add(member(dest->var_decl_node, "data"), + dest_offset); + gg_assign(gg_indirect(gg_cast(build_pointer_type(dest_type), dest_pointer)), + gg_cast(dest_type, value)); + } + +void +build_array_of_treeplets( int ngroup, + size_t N, + cbl_refer_t *refers) + { + if( N ) + { + // At the present time the most this routine is called is four times, for + // the implementation of the UNSTRING verb. + + if( N > MIN_FIELD_BLOCK_SIZE ) + { + gg_call(VOID, + "__gg__resize_treeplet", + build_int_cst_type(INT, ngroup), + build_int_cst_type(SIZE_T, N), + NULL_TREE + ); + } + switch(ngroup) + { + case 1: + for(size_t i=0; ivar_decl_node) + : gg_cast(cblc_field_p_type_node, null_pointer_node)); + gg_assign(gg_array_value(var_decl_treeplet_1o, i), + refer_offset_source(refers[i])); + gg_assign(gg_array_value(var_decl_treeplet_1s, i), + refer_size_source(refers[i])); + } + break; + case 2: + for(size_t i=0; ivar_decl_node) + : gg_cast(cblc_field_p_type_node, null_pointer_node)); + gg_assign(gg_array_value(var_decl_treeplet_2o, i), + refer_offset_source(refers[i])); + gg_assign(gg_array_value(var_decl_treeplet_2s, i), + refer_size_source(refers[i])); + } + break; + case 3: + for(size_t i=0; ivar_decl_node) + : gg_cast(cblc_field_p_type_node, null_pointer_node)); + gg_assign(gg_array_value(var_decl_treeplet_3o, i), + refer_offset_source(refers[i])); + gg_assign(gg_array_value(var_decl_treeplet_3s, i), + refer_size_source(refers[i])); + } + break; + case 4: + for(size_t i=0; ivar_decl_node) + : gg_cast(cblc_field_p_type_node, null_pointer_node)); + gg_assign(gg_array_value(var_decl_treeplet_4o, i), + refer_offset_source(refers[i])); + gg_assign(gg_array_value(var_decl_treeplet_4s, i), + refer_size_source(refers[i])); + } + break; + default: + abort(); + break; + } + } + else + { + // Just do nothing + } + } + +void +build_array_of_fourplets( int ngroup, + size_t N, + cbl_refer_t *refers) + { + int flag_bits = 0; + if( N ) + { + if( N > MIN_FIELD_BLOCK_SIZE ) + { + gg_call(VOID, + "__gg__resize_treeplet", + build_int_cst_type(INT, ngroup), + build_int_cst_type(SIZE_T, N), + NULL_TREE); + + gg_call(VOID, + "__gg__resize_int_p", + gg_get_address_of(var_decl_fourplet_flags_size), + gg_get_address_of(var_decl_fourplet_flags), + build_int_cst_type(SIZE_T, N), + NULL_TREE); + } + + for(size_t i=0; ivar_decl_node)); + gg_assign(gg_array_value(var_decl_treeplet_1o, i), + refer_offset_source(refers[i], &flag_bits)); + gg_assign(gg_array_value(var_decl_treeplet_1s, i), + refer_size_source(refers[i])); + gg_assign(gg_array_value(var_decl_fourplet_flags, i), + build_int_cst_type(INT, flag_bits)); + } + } + else + { + abort(); + } + } + +tree +build_array_of_size_t( size_t N, + const size_t *values) + { + // We create and populate an array of size_t values + + // This only works because it is used in but one spot. If this routine is + // called twice, be careful about how the first one is used. It's a static + // variable, you see. + static tree values_p = gg_define_variable(SIZE_T_P, "..baost_values_p", vs_file_static); + if( N ) + { + gg_assign( values_p, + gg_cast(build_pointer_type(SIZE_T), + gg_malloc(N*sizeof(SIZE_T)))); + + for(size_t i=0; itype == FldLiteralA); + char *buffer = NULL; + size_t buffer_length = 0; + if( buffer_length < field->data.capacity+1 ) + { + buffer_length = field->data.capacity+1; + buffer = (char *)xrealloc(buffer, buffer_length); + } + for(size_t i=0; idata.capacity; i++) + { + buffer[i] = ascii_to_internal(field->data.initial[i]); + } + buffer[field->data.capacity] = '\0'; + return buffer; + } + +bool +refer_is_clean(cbl_refer_t &refer) + { + if( !refer.field ) + { + // It is routine for a refer to have no field. It happens when the parser + // passes us a refer for an optional parameter that has been ommitted, for + // example. + return true; + } + + return !refer.all + && !refer.addr_of + && !refer.nsubscript + && !refer.refmod.from + && !refer.refmod.len + && !refer_has_depends(refer, refer_source) + ; + } + +void +REFER_CHECK(const char *func, + int line, + cbl_refer_t &refer + ) + { + static int counter=1; + + if( counter == 5 ) + { + fprintf(stderr, "DING! %d\n", counter); + } + + + fprintf(stderr, + "ct REFER_CHECK(%d): %s():%d %s\n", + counter, + func, + line, + refer.field->name); + + gg_printf("rt REFER_CHECK(%d): %s():%d %s (%s)\n", + build_int_cst_type(INT, counter), + gg_string_literal(func), + build_int_cst_type(INT, line), + gg_string_literal(refer.field->name), + gg_string_literal(cbl_field_type_str(refer.field->type)), + NULL_TREE); + counter+=1; + } + +static +tree // size_t +refer_refmod_length(cbl_refer_t &refer) + { + Analyze(); + if( refer.refmod.from || refer.refmod.len ) + { + // First, check for compile-time errors + bool any_length = !!(refer.field->attr & any_length_e); + tree rt_capacity; + static tree value64 = gg_define_variable(LONG, "..rrl_value64", vs_file_static); + static tree refstart = gg_define_variable(LONG, "..rrl_refstart", vs_file_static); + static tree reflen = gg_define_variable(LONG, "..rrl_reflen", vs_file_static); + + if( any_length ) + { + rt_capacity = + gg_cast(LONG, + member(refer.field->var_decl_node, "capacity")); + } + else + { + rt_capacity = + build_int_cst_type(LONG, refer.field->data.capacity); + } + + gg_assign(reflen, gg_cast(TREE_TYPE(reflen), integer_one_node)); + + if( process_this_exception(ec_bound_ref_mod_e) ) + { + get_integer_value(value64, + refer.refmod.from->field, + refer_offset_source(*refer.refmod.from), + CHECK_FOR_FRACTIONAL_DIGITS); + IF( var_decl_rdigits, + ne_op, + integer_zero_node ) + { + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(refstart, gg_cast(LONG, integer_one_node)); + } + else + { + rt_error("a refmod FROM value is not an integer"); + } + } + ELSE + gg_assign(refstart, value64); + ENDIF + } + else + { + get_integer_value(value64, + refer.refmod.from->field, + refer_offset_source(*refer.refmod.from) + ); + gg_assign(refstart, value64); + } + + // Make refstart zero-based: + gg_decrement(refstart); + + if( process_this_exception(ec_bound_ref_mod_e) ) + { + IF( refstart, lt_op, build_int_cst_type(LONG, 0 ) ) + { + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(refstart, gg_cast(LONG, integer_zero_node)); + } + else + { + rt_error("a refmod FROM value is less than zero"); + } + } + ELSE + { + IF( refstart, gt_op, rt_capacity ) + { + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(refstart, gg_cast(LONG, integer_zero_node)); + } + else + { + rt_error("a refmod FROM value is too large"); + } + } + ELSE + { + if( refer.refmod.len ) + { + get_integer_value(value64, + refer.refmod.len->field, + refer_offset_source(*refer.refmod.len), + CHECK_FOR_FRACTIONAL_DIGITS); + IF( var_decl_rdigits, + ne_op, + integer_zero_node ) + { + // length is not an integer + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(reflen, gg_cast(LONG, integer_one_node)); + } + else + { + rt_error("a refmod LENGTH is not an integer"); + } + } + ELSE + { + gg_assign(reflen, gg_cast(LONG, value64)); + } + ENDIF + + IF( reflen, lt_op, gg_cast(LONG, integer_one_node) ) + { + // length is too small + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + gg_assign(reflen, gg_cast(LONG, integer_one_node)); + } + else + { + rt_error("a refmod LENGTH is less than one"); + } + } + ELSE + { + IF( gg_add(refstart, reflen), + gt_op, + rt_capacity ) + { + // Start + Length is too large + if( enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + SET_EXCEPTION_CODE(ec_bound_ref_mod_e); + + // Our intentions are honorable. But at this point, where + // we notice that start + length is too long, the + // get_data_offset_source routine has already been run and + // it's too late to actually change the refstart. There are + // theoretical solutions to this -- mainly, + // get_data_offset_source needs to check the start + len for + // validity. But I am not going to do it now. Think of this + // as the TODO item. + gg_assign(refstart, gg_cast(LONG, integer_zero_node)); + gg_assign(reflen, gg_cast(LONG, integer_one_node)); + } + else + { + rt_error("refmod START + LENGTH is too large"); + } + } + ELSE + ENDIF + } + ENDIF + } + else + { + // There is no refmod length, so we default to the remaining characters + tree subtract_expr = gg_subtract( rt_capacity, + refstart); + gg_assign(reflen, subtract_expr); + } + } + ENDIF + } + ENDIF + } + else + { + if( refer.refmod.len ) + { + get_integer_value(value64, + refer.refmod.len->field, + refer_offset_source(*refer.refmod.len) + ); + gg_assign(reflen, gg_cast(LONG, value64)); + } + else + { + // There is no refmod length, so we default to the remaining characters + gg_assign(reflen, gg_subtract(rt_capacity, + refstart)); + } + } + + // Arrive here with valid values for refstart and reflen: + + return gg_cast(SIZE_T, reflen); + } + else + { + return size_t_zero_node; + } + } + +static +tree // size_t +refer_fill_depends(cbl_refer_t &refer) + { + // This returns a positive number which is the amount a depends-limited + // capacity needs to be reduced. + Analyze(); + cbl_field_t *odo = symbol_find_odo(refer.field); + cbl_field_t *depending_on; + depending_on = cbl_field_of(symbol_at(odo->occurs.depending_on)); + // refer.field has a relevant DEPENDING ON clause + + // gg_printf("var is %s type is %s\n", + // gg_string_literal(refer.field->name), + // gg_string_literal(cbl_field_type_str(refer.field->type)), + // NULL_TREE); + // gg_printf(" odo is %s\n", gg_string_literal(odo->name), NULL_TREE); + + // gg_printf(" depending_on is %s\n", gg_string_literal(depending_on->name), NULL_TREE); + // fprintf(stderr, + // "symbol_find_odo found %s, with depending_on %s\n", + // odo->name, + // depending_on->name); + + static tree value64 = gg_define_variable(LONG, "..rfd_value64", vs_file_static); + if( process_this_exception(ec_bound_odo_e) ) + { + get_integer_value(value64, + depending_on, + NULL, + CHECK_FOR_FRACTIONAL_DIGITS); + IF( var_decl_rdigits, ne_op, integer_zero_node ) + { + // This needs to evaluate to an integer + if( enabled_exceptions.match(ec_bound_odo_e) ) + { + SET_EXCEPTION_CODE(ec_bound_odo_e); + gg_assign(value64, build_int_cst_type(TREE_TYPE(value64), odo->occurs.bounds.upper)); + } + else + { + rt_error("DEPENDING ON is not an integer"); + } + } + ELSE + ENDIF + } + else + { + get_integer_value(value64, depending_on); + } + + if( process_this_exception(ec_bound_odo_e) ) + { + IF( value64, gt_op, build_int_cst_type(TREE_TYPE(value64), odo->occurs.bounds.upper) ) + { + SET_EXCEPTION_CODE(ec_bound_odo_e); + gg_assign(value64, build_int_cst_type(TREE_TYPE(value64), odo->occurs.bounds.upper)); + } + ELSE + { + IF( value64, lt_op, build_int_cst_type(TREE_TYPE(value64), odo->occurs.bounds.lower) ) + { + if( enabled_exceptions.match(ec_bound_odo_e) ) + { + SET_EXCEPTION_CODE(ec_bound_odo_e); + gg_assign(value64, build_int_cst_type(TREE_TYPE(value64), odo->occurs.bounds.lower)); + } + else + { + rt_error("DEPENDING ON is less than OCCURS lower limit"); + } + } + ELSE + ENDIF + IF( value64, lt_op, gg_cast(TREE_TYPE(value64), integer_zero_node) ) + { + if( enabled_exceptions.match(ec_bound_odo_e) ) + { + SET_EXCEPTION_CODE(ec_bound_odo_e); + gg_assign(value64, gg_cast(TREE_TYPE(value64), integer_zero_node)); + } + else + { + rt_error("DEPENDING ON is greater than OCCURS upper limit"); + } + } + ELSE + ENDIF + } + ENDIF + } + // value64 is >= zero and < bounds.upper + + // We multiply the ODO value by the size of the data capacity to get the + // shortened length: + + tree mult_expr = gg_multiply( build_int_cst_type(TREE_TYPE(value64), odo->data.capacity), + value64 ); + + // And we add that to the distance from the requested variable to the odo + // variable to get the modified length: + tree add_expr = gg_add(mult_expr, build_int_cst_type(SIZE_T, odo->offset - refer.field->offset)); + return add_expr; + } + +tree // size_t +refer_offset_dest(cbl_refer_t &refer) + { + Analyze(); + // This has to be on the stack, because there are places where this routine + // is called twice before the results are used. + + if( !refer.field ) + { + return size_t_zero_node; + } + + if( !refer.nsubscript ) + { + return get_data_offset_dest(refer); + } + + gg_assign(var_decl_odo_violation, integer_zero_node); + + tree retval = gg_define_variable(SIZE_T); + gg_assign(retval, get_data_offset_dest(refer)); + if( process_this_exception(ec_bound_odo_e) ) + { + IF( var_decl_odo_violation, ne_op, integer_zero_node ) + { + if( enabled_exceptions.match(ec_bound_odo_e) ) + { + SET_EXCEPTION_CODE(ec_bound_odo_e); + } + else + { + rt_error("receiving item subscript not in DEPENDING ON range"); + } + } + ELSE + ENDIF + } + return retval; + } + +tree // size_t +refer_size_dest(cbl_refer_t &refer) + { + Analyze(); + //static tree retval = gg_define_variable(SIZE_T, "..rsd_retval", vs_file_static); + tree retval = gg_define_variable(SIZE_T); + + if( !refer.field ) + { + return size_t_zero_node; + } + if( refer_is_clean(refer) ) + { + // When the refer has no modifications, we return zero, which is interpreted + // as "use the original length" + if( refer.field->attr & (intermediate_e | any_length_e) ) + { + return member(refer.field->var_decl_node, "capacity"); + } + else + { + return build_int_cst_type(SIZE_T, refer.field->data.capacity); + } + } + + // Step the first: Get the actual full length: + if( refer.field->attr & (intermediate_e | any_length_e) ) + { + // This is an intermediate; use the length that might have changed + // because of a FUNCTION TRIM, or whatnot. + + // We also pick up capacity for variables that were specified in + // linkage as ANY LENGTH + gg_assign(retval, member(refer.field->var_decl_node, "capacity")); + } + + if( refer_has_depends(refer, refer_dest) ) + { + // Because there is a depends, we might have to change the length: + gg_assign(retval, refer_fill_depends(refer)); + } + else + { + // Use the compile-time value + gg_assign(retval, build_int_cst_type(SIZE_T, refer.field->data.capacity)); + } + + if( refer.refmod.from || refer.refmod.len ) + { + tree refmod = refer_refmod_length(refer); + // retval is the ODO based total length. + // refmod is the length resulting from refmod(from:len) + // We have to reduce retval by the effect of refmod: + tree diff = gg_subtract(build_int_cst_type(SIZE_T, refer.field->data.capacity), + refmod); + gg_assign(retval, gg_subtract(retval, diff)); + } + return retval; + } + +tree // size_t +refer_offset_source(cbl_refer_t &refer, + int *pflags) + { + if( !refer.field ) + { + return size_t_zero_node; + } + if( !refer.nsubscript ) + { + return get_data_offset_source(refer); + } + + Analyze(); + + tree retval = gg_define_variable(SIZE_T); + gg_assign(var_decl_odo_violation, integer_zero_node); + + gg_assign(retval, get_data_offset_source(refer, pflags)); + if( process_this_exception(ec_bound_odo_e) ) + { + IF( var_decl_odo_violation, ne_op, integer_zero_node ) + { + if( enabled_exceptions.match(ec_bound_odo_e) ) + { + SET_EXCEPTION_CODE(ec_bound_odo_e); + } + else + { + rt_error("sending item subscript not in DEPENDING ON range"); + } + } + ELSE + ENDIF + } + return retval; + } + +tree // size_t +refer_size_source(cbl_refer_t &refer) + { + if( !refer.field ) + { + return size_t_zero_node; + } + if( refer_is_clean(refer) ) + { + // When the refer has no modifications, we return zero, which is interpreted + // as "use the original length" + if( refer.field->attr & (intermediate_e | any_length_e) ) + { + return member(refer.field->var_decl_node, "capacity"); + } + else + { + return build_int_cst_type(SIZE_T, refer.field->data.capacity); + } + } + + Analyze(); + + // Step the first: Get the actual full length: + static tree retval = gg_define_variable(SIZE_T, "..rss_retval", vs_file_static); + if( refer.field->attr & (intermediate_e | any_length_e) ) + { + // This is an intermediate; use the length that might have changed + // because of a FUNCTION TRIM, or whatnot. + + // We also pick up capacity for variables that were specified in + // linkage as ANY LENGTH + gg_assign(retval, + member(refer.field->var_decl_node, "capacity")); + } + + if( refer_has_depends(refer, refer_source) ) + { + // Because there is a depends, we might have to change the length: + gg_assign(retval, refer_fill_depends(refer)); + } + else + { + // Use the compile-time value + gg_assign(retval, build_int_cst_type(SIZE_T, refer.field->data.capacity)); + } + + if( refer.refmod.from || refer.refmod.len ) + { + tree refmod = refer_refmod_length(refer); + // retval is the ODO based total length. + // refmod is the length resulting from refmod(from:len) + // We have to reduce retval by the effect of refmod: + tree diff = gg_subtract(build_int_cst_type(SIZE_T, refer.field->data.capacity), + refmod); + gg_assign(retval, gg_subtract(retval, diff)); + } + return retval; + } + +tree +qualified_data_source(cbl_refer_t &refer) + { + return gg_add(member(refer.field->var_decl_node, "data"), + refer_offset_source(refer)); + } + +tree +qualified_data_dest(cbl_refer_t &refer) + { + return gg_add(member(refer.field->var_decl_node, "data"), + refer_offset_dest(refer)); + } diff --git a/gcc/cobol/genutil.h b/gcc/cobol/genutil.h new file mode 100644 index 00000000000..e2523778032 --- /dev/null +++ b/gcc/cobol/genutil.h @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _GENUTIL_H_ +#define _GENUTIL_H_ + +#define EBCDIC_MINUS (0x60) +#define EBCDIC_PLUS (0x4E) +#define EBCDIC_ZERO (0xF0) +#define EBCDIC_NINE (0xF9) + +bool internal_codeset_is_ebcdic(); + +extern bool exception_location_active; +extern bool skip_exception_processing; + +extern bool suppress_dest_depends; + +extern std::vectorcurrent_filename; + +extern tree var_decl_exception_code; // int __gg__exception_code; +extern tree var_decl_exception_handled; // int __gg__exception_handled; +extern tree var_decl_exception_file_number; // int __gg__exception_file_number; +extern tree var_decl_exception_file_status; // int __gg__exception_file_status; +extern tree var_decl_exception_file_name; // const char *__gg__exception_file_name; +extern tree var_decl_exception_statement; // const char *__gg__exception_statement; +extern tree var_decl_exception_source_file; // const char *__gg__exception_source_file; +extern tree var_decl_exception_line_number; // int __gg__exception_line_number; +extern tree var_decl_exception_program_id; // const char *__gg__exception_program_id; +extern tree var_decl_exception_section; // const char *__gg__exception_section; +extern tree var_decl_exception_paragraph; // const char *__gg__exception_paragraph; + +extern tree var_decl_default_compute_error; // int __gg__default_compute_error; +extern tree var_decl_rdigits; // int __gg__rdigits; +extern tree var_decl_odo_violation; // int __gg__odo_violation; +extern tree var_decl_unique_prog_id; // size_t __gg__unique_prog_id; + +extern tree var_decl_entry_location; // This is for managing ENTRY statements +extern tree var_decl_exit_address; // This is for implementing pseudo_return_pop + +extern tree var_decl_call_parameter_signature; // char *__gg__call_parameter_signature +extern tree var_decl_call_parameter_count; // int __gg__call_parameter_count +extern tree var_decl_call_parameter_lengths; // size_t *var_decl_call_parameter_lengths + +extern tree var_decl_return_code; // short __gg__data_return_code + +extern tree var_decl_arithmetic_rounds_size; // size_t __gg__arithmetic_rounds_size; +extern tree var_decl_arithmetic_rounds; // int* __gg__arithmetic_rounds; +extern tree var_decl_fourplet_flags_size; // size_t __gg__fourplet_flags_size; +extern tree var_decl_fourplet_flags; // int* __gg__fourplet_flags; + +extern tree var_decl_treeplet_1f; // cblc_field_pp_type_node , "__gg__treeplet_1f" +extern tree var_decl_treeplet_1o; // SIZE_T_P , "__gg__treeplet_1o" +extern tree var_decl_treeplet_1s; // SIZE_T_P , "__gg__treeplet_1s" +extern tree var_decl_treeplet_2f; // cblc_field_pp_type_node , "__gg__treeplet_2f" +extern tree var_decl_treeplet_2o; // SIZE_T_P , "__gg__treeplet_2o" +extern tree var_decl_treeplet_2s; // SIZE_T_P , "__gg__treeplet_2s" +extern tree var_decl_treeplet_3f; // cblc_field_pp_type_node , "__gg__treeplet_3f" +extern tree var_decl_treeplet_3o; // SIZE_T_P , "__gg__treeplet_3o" +extern tree var_decl_treeplet_3s; // SIZE_T_P , "__gg__treeplet_3s" +extern tree var_decl_treeplet_4f; // cblc_field_pp_type_node , "__gg__treeplet_4f" +extern tree var_decl_treeplet_4o; // SIZE_T_P , "__gg__treeplet_4o" +extern tree var_decl_treeplet_4s; // SIZE_T_P , "__gg__treeplet_4s" + +extern tree var_decl_nop; // int __gg__nop +extern tree var_decl_main_called; // int __gg__main_called + +int get_scaled_rdigits(cbl_field_t *field); +int get_scaled_digits(cbl_field_t *field); +tree tree_type_from_digits(size_t digits, int signable); +tree tree_type_from_size(size_t bytes, int signable); +tree tree_type_from_field(cbl_field_t *field); +void get_binary_value( tree value, + tree rdigits, + cbl_field_t *field, + tree field_offset, + tree hilo = NULL); +tree get_data_address( cbl_field_t *field, + tree offset); +__int128 get_power_of_ten(int n); +void scale_by_power_of_ten_N(tree value, + int N, + bool check_for_fractional = false); +tree scale_by_power_of_ten(tree value, + tree N, + bool check_for_fractional = false); +void scale_and_round(tree value, + int value_rdigits, + bool target_is_signable, + int target_rdigits, + cbl_round_t rounded); +void hex_dump(tree data, size_t bytes); +void set_exception_code_func(ec_type_t ec, + int line, + int from_raise_statement=0); +#define set_exception_code(ec) set_exception_code_func(ec, __LINE__) +bool process_this_exception(ec_type_t ec); +#define CHECK_FOR_FRACTIONAL_DIGITS true +void get_integer_value(tree value, + cbl_field_t *field, + tree offset=NULL, // size_t + bool check_for_fractional_digits=false); +void rt_error(const char *msg); +void copy_little_endian_into_place(cbl_field_t *dest, + tree dest_offset, + tree value, + int rhs_rdigits, + bool check_for_error, + tree &size_error); +tree build_array_of_size_t( size_t N, + const size_t *values); +void parser_display_internal_field(tree file_descriptor, + cbl_field_t *field, + bool advance=DISPLAY_NO_ADVANCE); +char *get_literal_string(cbl_field_t *field); + +bool refer_is_clean(cbl_refer_t &refer); + +tree refer_offset_source(cbl_refer_t &refer, + int *pflags=NULL); +tree refer_size_source(cbl_refer_t &refer); +tree refer_offset_dest(cbl_refer_t &refer); +tree refer_size_dest(cbl_refer_t &refer); + +void REFER_CHECK( const char *func, + int line, + cbl_refer_t &refer + ); +#define refer_check(a) REFER_CHECK(__func__, __LINE__, a) + +tree qualified_data_source(cbl_refer_t &refer); + +tree qualified_data_dest(cbl_refer_t &refer); + +void build_array_of_treeplets( int ngroup, + size_t N, + cbl_refer_t *refers); + +void build_array_of_fourplets( int ngroup, + size_t N, + cbl_refer_t *refers); +#endif diff --git a/gcc/cobol/help.gen b/gcc/cobol/help.gen new file mode 100755 index 00000000000..6aa201f0ac7 --- /dev/null +++ b/gcc/cobol/help.gen @@ -0,0 +1,15 @@ +#! /usr/bin/awk -f + +BEGIN { + print "puts(" +} + +/^ {5}[-][[:alnum:]-]+/, /[.] / { + gsub(/[.] .+/, ". ") + gsub(/^ /, ""); + print "\t\"" $0 "\\n\"" +} + +END { + print ");" +} diff --git a/gcc/cobol/inspect.h b/gcc/cobol/inspect.h new file mode 100644 index 00000000000..9e86a0bd1a7 --- /dev/null +++ b/gcc/cobol/inspect.h @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef INSPECT_H +#define INSPECT_H +#include +#include +#include +#include + +/* + * INSPECT has 3 repeating elements: + * + * 1. cbl_inspect_t + * Tally (identifier-2). parser_inspect takes N of these. + * Because REPLACING has no such loop, N == 1 for REPLACING. + * + * 2. cbl_inspect_oper_t + * The CHARACTERS/ALL/LEADING/FIRST phrase (type of match) + * Has N match/replace operands (or both) + * + * 3. cbl_inspect_match_t and cbl_inspect_replace_t + * The CHARACTERS/ALL/LEADING/FIRST operands + * Has N tuples of identifier-3 + [BEFORE and/or AFTER] + */ + +static inline bool +is_active( const cbl_refer_t& refer ) { return NULL != refer.field; } + +template +struct cbx_inspect_qual_t { + bool initial; + DATA identifier_4; + + cbx_inspect_qual_t() : initial(false), identifier_4(DATA()) {} + cbx_inspect_qual_t( bool initial, const DATA& identifier_4 ) + : initial(initial), identifier_4(identifier_4) + { + //if( identifier_4.field ) yywarn("%s:qualifying field is '%s'", __func__, identifier_4.field->name); + } + cbx_inspect_qual_t( const cbx_inspect_qual_t& that ) + : initial(that.initial) + , identifier_4(that.identifier_4) + { + //if( identifier_4.field ) yywarn("%s:qualifying field is '%s'", __func__, identifier_4.field->name); + } + + cbx_inspect_qual_t& operator=( const cbx_inspect_qual_t& that ) { + initial = that.initial; + identifier_4 = that.identifier_4; + //if( identifier_4.field ) yywarn("%s:qualifying field is '%s'", __func__, identifier_4.field->name); + return *this; + } + + bool active() const { return is_active(identifier_4); } + + void clear() { + initial = false; + identifier_4.clear(); + } +}; + +typedef cbx_inspect_qual_t cbl_inspect_qual_t; + +/* + * Data for INSPECT X TALLYING Y FOR. Captures information for operands of + * CHARACTERS/ALL/LEADING. The CHARACTERS/ALL/LEADING control is kept at the + * next higher level, and may be repeated for each tally. + * + * cbx_inspect_match_t::matching is not used with CHARACTERS + */ +template +struct cbx_inspect_match_t { + DATA matching; // identifier-3/5 or literal-1/3 + cbx_inspect_qual_t before, after; // phrase 1 + + cbx_inspect_match_t( + const DATA& matching = DATA(), + cbx_inspect_qual_t before = cbx_inspect_qual_t(), + cbx_inspect_qual_t after = cbx_inspect_qual_t() + ) + : matching(matching) + , before(before) + , after(after) + {} + // match all characters + bool match_any() const { return !(before.active() || after.active()); } +}; + +typedef cbx_inspect_match_t cbl_inspect_match_t; + +/* + * Data for INSPECT X REPLACING. The CHARACTERS/ALL/LEADING/FIRST control is + * kept at the next higher level, and may be repeated. + */ +template +struct cbx_inspect_replace_t : public cbx_inspect_match_t { + DATA replacement; + + cbx_inspect_replace_t( const DATA& matching = DATA(), + const DATA& replacement = DATA() ) + : cbx_inspect_match_t(matching) + , replacement(replacement) + {} + cbx_inspect_replace_t( const DATA& matching, + const DATA& replacement, + const cbx_inspect_qual_t& before, + const cbx_inspect_qual_t& after ) + : cbx_inspect_match_t(matching, before, after) + , replacement(replacement) + {} +}; + +typedef cbx_inspect_replace_t cbl_inspect_replace_t; + +// One partial tally or substitution. +template +struct cbx_inspect_oper_t { + cbl_inspect_bound_t bound; // CHARACTERS/ALL/LEADING/FIRST + size_t n_identifier_3; // N matches/replaces + // either tallies or replaces is NULL + cbx_inspect_match_t *matches; + cbx_inspect_replace_t *replaces; + + cbx_inspect_oper_t( cbl_inspect_bound_t bound, + std::list> matches ) + : bound(bound) + , n_identifier_3( matches.size()) + , matches(NULL) + , replaces(NULL) + { + this->matches = new cbx_inspect_match_t[n_identifier_3]; + std::copy( matches.begin(), matches.end(), this->matches ); + } + + cbx_inspect_oper_t( cbl_inspect_bound_t bound, + std::list> replaces ) + : bound(bound) + , n_identifier_3( replaces.size() ) + , matches(NULL) + , replaces(NULL) + { + this->replaces = new cbx_inspect_replace_t[n_identifier_3]; + std::copy( replaces.begin(), replaces.end(), this->replaces ); + } + + cbx_inspect_oper_t() + : bound(bound_characters_e) + , n_identifier_3(0) + , matches(NULL) + , replaces(NULL) + { + assert( is_valid() ); + } + + bool is_valid() const { + if( matches && replaces ) return false; + if( matches || replaces ) return n_identifier_3 > 0; + return n_identifier_3 == 0; + } +}; + +typedef cbx_inspect_oper_t cbl_inspect_oper_t; + +// One whole tally or substitution. For REPLACING, nbound == 1 +template +struct cbx_inspect_t { + DATA tally; // identifier-2: NULL without a tally + size_t nbound; // Each FOR or REPLACING operation starts with a cbl_inspect_bound_t + cbx_inspect_oper_t *opers; + + cbx_inspect_t( const DATA& tally = DATA() ) + : tally(tally) + , nbound(0) + , opers(NULL) + {} + cbx_inspect_t( const DATA& tally, cbx_inspect_oper_t oper ) + : tally(tally) + , nbound(1) + , opers(NULL) + { + this->opers = new cbx_inspect_oper_t[1]; + this->opers[0] = oper; + } + cbx_inspect_t( const DATA& tally, + const std::list>& opers ) + : tally(tally) + , nbound( opers.size() ) + , opers(NULL) + { + this->opers = new cbx_inspect_oper_t[nbound]; + std::copy( opers.begin(), opers.end(), this->opers ); + } +}; + +typedef cbx_inspect_t cbl_inspect_t; + + +/* + * Runtime + */ + +void parser_inspect( cbl_refer_t input, bool backward, + size_t ninspect, cbl_inspect_t *inspects ); +void parser_inspect_conv( cbl_refer_t input, bool backward, + cbl_refer_t original, + cbl_refer_t replacement, + cbl_inspect_qual_t before = cbl_inspect_qual_t(), + cbl_inspect_qual_t after = cbl_inspect_qual_t() ); + +#endif // INSPECT_H diff --git a/gcc/cobol/lang-specs.h b/gcc/cobol/lang-specs.h new file mode 100644 index 00000000000..78e84c03464 --- /dev/null +++ b/gcc/cobol/lang-specs.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* gcc-src/gcc/config/lang-specs.h */ + {".cob", "@cobol", 0, 0, 0}, + {".COB", "@cobol", 0, 0, 0}, + {".cbl", "@cobol", 0, 0, 0}, + {".CBL", "@cobol", 0, 0, 0}, + {"@cobol", + "cobol1 %i %(cc1_options) " + "%{D*} %{E} %{I*} %{fmax-errors*} %{fsyntax-only*} " + "%{fcobol-exceptions*} " + "%{copyext} " + "%{fstatic-call} %{fdefaultbyte} " + "%{ffixed-form} %{ffree-form} %{indicator-column*} " + "%{preprocess} " + "%{dialect} " + "%{include} " + "%{nomain} " + "%{!fsyntax-only:%(invoke_as)} " + , 0, 0, 0}, diff --git a/gcc/cobol/lang.opt b/gcc/cobol/lang.opt new file mode 100644 index 00000000000..42c402037b5 --- /dev/null +++ b/gcc/cobol/lang.opt @@ -0,0 +1,144 @@ +; lang.opt -- Options for the gcc Cobol front end. + +; Copyright (C) 2021-2025 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +; See the GCC internals manual for a description of this file's format. + +; Please try to keep this file in ASCII collating order. + +Language +Cobol + +D +Cobol Joined Separate +; Documented in c.opt + +E +Cobol +; Documented in c.opt + +I +Cobol Joined Separate +;; -I Add copybook search directory +; Documented in c.opt + +dialect +Cobol Joined Separate Enum(dialect_type) EnumBitSet Var(cobol_dialect) +Accept COBOL constructs used by non-ISO compilers + +Enum +Name(dialect_type) Type(int) UnknownError(Unrecognized COBOL dialect name: %qs) + +EnumValue +Enum(dialect_type) String(gcc) Value(0x04) Canonical + +EnumValue +Enum(dialect_type) String(ibm) Value(0x01) + +EnumValue +Enum(dialect_type) String(mf) Value(0x02) + +EnumValue +Enum(dialect_type) String(gnu) Value(0x04) + +fcobol-exceptions +Cobol Joined Separate Var(cobol_exceptions) +-fcobol-exceptions= Enable some exceptions by default + +copyext +Cobol Joined Separate Var(cobol_copyext) Init(0) +Define alternative implicit copybook filename extension + +fdefaultbyte +Cobol RejectNegative Joined Separate UInteger Var(cobol_default_byte) +Set Working-Storage data items to the supplied value + +fflex-debug +Cobol Var(yy_flex_debug, 1) Init(0) +Enable Cobol lex debugging + +ffixed-form +Cobol RejectNegative +Assume that the source file is fixed form. + +fsyntax-only +Cobol RejectNegative +; Documented in c.opt + +ffree-form +Cobol RejectNegative +Assume that the source file is free form. + +findicator-column +Cobol RejectNegative Joined Separate UInteger Var(indicator_column) Init(0) IntegerRange(0, 8) +-findicator-column= Column after which Region A begins + +finternal-ebcdic +Cobol Var(cobol_ebcdic, 1) Init(0) +-finternal-ebcdic Internal processing is in EBCDIC Code Page 1140 + +fmax-errors +Cobol Joined Separate +; Documented in C + +fstatic-call +Cobol Var(cobol_static_call, 1) Init(1) +Enable/disable static linkage for CALL literals + +ftrace-debug +Cobol Var(cobol_trace_debug, 1) Init(0) +Enable Cobol parser debugging + +fyacc-debug +Cobol Var(yy_debug, 1) Init(0) +Enable Cobol yacc debugging + +preprocess +Cobol Joined Separate Var(cobol_preprocess) +preprocess before compiling + +iprefix +Cobol Joined Separate +; Documented in C + +include +Cobol Joined Separate Var(cobol_include) +; Documented in C + +isysroot +Cobol Joined Separate +; Documented in C + +isystem +Cobol Joined Separate +; Documented in C + +main +Cobol +-main The first program-id in the next source file is called by a generated main() entry point + +main= +Cobol Joined Var(cobol_main_string) +-main= source_file/PROGRAM-ID is called by the generated main() + +nomain +Cobol +-nomain No main() function is created from COBOL source files + +; This comment is to ensure we retain the blank line above. diff --git a/gcc/cobol/lang.opt.urls b/gcc/cobol/lang.opt.urls new file mode 100644 index 00000000000..a0e1f1944fe --- /dev/null +++ b/gcc/cobol/lang.opt.urls @@ -0,0 +1,29 @@ +; Autogenerated by regenerate-opt-urls.py from gcc/cobol/lang.opt and generated HTML + +D +UrlSuffix(gcc/Preprocessor-Options.html#index-D-1) + +; skipping UrlSuffix for 'E' due to multiple URLs: +; duplicate: 'gcc/Link-Options.html#index-E-1' +; duplicate: 'gcc/Overall-Options.html#index-E' + +I +UrlSuffix(gcc/Directory-Options.html#index-I) LangUrlSuffix_D(gdc/Directory-Options.html#index-I) + +fsyntax-only +UrlSuffix(gcc/Warning-Options.html#index-fsyntax-only) LangUrlSuffix_D(gdc/Warnings.html#index-fno-syntax-only) + +fmax-errors +UrlSuffix(gcc/Warning-Options.html#index-fmax-errors) LangUrlSuffix_D(gdc/Warnings.html#index-fmax-errors) + +iprefix +UrlSuffix(gcc/Directory-Options.html#index-iprefix) LangUrlSuffix_D(gdc/Directory-Options.html#index-iprefix) + +include +UrlSuffix(gcc/Preprocessor-Options.html#index-include) + +isysroot +UrlSuffix(gcc/Directory-Options.html#index-isysroot) + +isystem +UrlSuffix(gcc/Directory-Options.html#index-isystem) diff --git a/gcc/cobol/lexio.cc b/gcc/cobol/lexio.cc new file mode 100644 index 00000000000..40ba873b7fb --- /dev/null +++ b/gcc/cobol/lexio.cc @@ -0,0 +1,1878 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "cobol-system.h" +#include "cbldiag.h" +#include "util.h" +#include "copybook.h" +#include "lexio.h" + +extern int yy_flex_debug; + +static struct { + bool first_file, explicitly; + int column, right_margin; + bool inference_pending() { + bool tf = first_file && !explicitly; + first_file = false; + return tf; + } +} indicator = { true, false, 0, 0 }; + +static bool debug_mode = false; + +/* + * The "debug mode" is a little odd, because we have to make sure a + * leading "D" doesn't start the verb DISPLAY (for example). If + * debug_mode is on, debug lines are included in the parse. If + * debug_mode is off but we're not in fixed_format, lines starting + * with "D" are also included. + * + * So, the line is excluded if: fixed format and not debug mode + * Else, it's included. +*/ + +static inline int left_margin() { + return indicator.column == 0? indicator.column : indicator.column - 1; +} +static inline int right_margin() { + return indicator.right_margin == 0? + indicator.right_margin : indicator.right_margin - 1; +} + +/* + * When setting the indicator column explicity: + * To get strict fixed 72-column lines, use a negative column number. + * When setting back to 0 (free), the right margin is also reset to 0. + */ +void +cobol_set_indicator_column( int column ) +{ + indicator.explicitly = true; + if( column == 0 ) indicator.right_margin = 0; + if( column < 0 ) { + column = -column; + indicator.right_margin = 73; + } + indicator.column = column; +} + +bool is_fixed_format() { return indicator.column == 7; } +bool is_reference_format() { + return indicator.column == 7 && indicator.right_margin == 73; +} +bool include_debug() { return indicator.column == 7 && debug_mode; } +bool set_debug( bool tf ) { return debug_mode = tf && is_fixed_format(); } + +static bool nonblank( const char ch ) { return !isblank(ch); } + +static inline char * +start_of_line( char *bol, char *eol ) { + bol = std::find_if(bol, eol, nonblank); + gcc_assert(bol < eol); // must exist + return bol; +} + +static inline char * +continues_at( char *bol, char *eol ) { + if( indicator.column == 0 ) return NULL; // cannot continue in free format + bol += left_margin(); + if( *bol != '-' ) return NULL; // not a continuation line + return start_of_line(++bol, eol); +} + +// Return pointer to indicator column. Test ch if provided. +// NULL means no indicator column or tested value not present. +static inline char * +indicated( char *bol, char *eol, char ch = '\0' ) { + if( indicator.column == 0 && *bol != '*' ) { + return NULL; // no indicator column in free format, except for comments + } + gcc_assert(bol != NULL); + auto ind = bol + left_margin(); + if( eol <= ind ) return NULL; // left margin would be after end of line + // If TAB is in the line-number region, nothing is in the indicator column. + bool has_tab = std::any_of(bol, ind, + [](const char ch) { return ch == '\t'; } ); + if( has_tab ) return NULL; + if( (bol += left_margin()) > eol ) return NULL; + return ch == '\0' || ch == *bol? bol : NULL; +} + +static char * +remove_inline_comment( char *bol, char *eol ) { + static char ends = '\0'; + char *nl = std::find(bol, eol, '\n'); + + if( bol < nl ) { + std::swap(*nl, ends); + char *comment = strstr(bol, "*>"); + if( comment ) { + std::fill(comment, nl, SPACE); + } + std::swap(*nl, ends); + } + return eol; +} + +static void +erase_line( char *src, char *esrc ) { + dbgmsg( "%s: erasing %.*s from input", __func__, int(esrc-src), src); + erase_source(src, esrc); +} + +static size_t +count_newlines( const char *beg, const char *end ) { + return std::count(beg, end, '\n'); +} + +size_t +filespan_t::tab_check( const char *src, const char *esrc ) { + static const char tab = '\t'; + + const char *data = src + left_margin(); + if( data < esrc ) { // not a blank line + const char *tab_at = std::find(src, data, tab); + if( tab_at < data ) { + return (tab_at - src) + 1; + } + } + return 0; +} + +static const auto extended_icase = regex::extended | regex::icase; + +std::stack< std::list > replace_directives; + +static bool +is_word_or_quote( char ch ) { + return ch == '"' || ch == '\'' || ISALNUM(ch); +} +/* + * If the replacement is not leading/trailing, the edges of the + * matched pattern must delimit a Cobol word. If not, add a space to + * the replacement. + */ +static void +maybe_add_space(const span_t& pattern, replace_t& recognized) { + static const char blank[] = " "; + const char *befter[2] = { "", "" }; + gcc_assert(0 < recognized.before.size()); + + // start of pattern and end of preceding text + if( pattern.p[0] == '.' && is_word_or_quote(recognized.before.p[-1]) ) { + befter[0] = blank; + } + // end of pattern, and start of succeeding text + if( pattern.pend[-1] == '.' && is_word_or_quote(recognized.before.pend[0]) ) { + befter[1] = blank; + } + + if( befter[0] == blank || befter[1] == blank ) { + char *s = xasprintf( "%s%.*s%s", + befter[0], + recognized.after.size(), recognized.after.p, + befter[1] ); + recognized.after = span_t(s, s + strlen(s)); + } +} + +/* + * Keep track of the next pending replacement for each active REPLACE + * directive. For the current line, apply patterns that begins on the + * line. (It may match input extending beyond the current eol.) + * + * As each replacement is identified, append it to the passsed list of + * pending replacements. For these elements: + * + * before is a span in mfile + * after is dynamically allocated + */ +static void +recognize_replacements( filespan_t mfile, std::list& pending_replacements ) { + static const char *top_of_stack_cache, *applies_to; + + struct future_replacement_t { + replace_t directive; + span_t found; + future_replacement_t( const replace_t& replace, span_t found ) + : directive(replace), found(found) + {} + bool operator<( const future_replacement_t& that ) const { + return found.p < that.found.p; + } + }; + + static std::list futures; + + if( replace_directives.empty() ) return; + + if( ! (top_of_stack_cache == replace_directives.top().front().before.p + && + applies_to == mfile.data) ) + { + futures.clear(); + top_of_stack_cache = replace_directives.top().front().before.p; + applies_to = mfile.data; + } + + if( futures.empty() ) { + /* + * From the current point in the file, find the next match for each + * pattern at the top of the replacement stack. + */ + for( const auto& directive : replace_directives.top() ) { + regex re(directive.before.p, extended_icase); + cmatch cm; + + span_t found(mfile.eodata, mfile.eodata); + + if( regex_search( mfile.ccur(), (const char *)mfile.eodata, cm, re) ) { + gcc_assert(cm[1].matched); + found = span_t( cm[1].first, cm[1].second ); + if( yy_flex_debug ) { + size_t n = count_newlines(mfile.data, found.p); + dbgmsg("%s:%d first '%.*s' is on line %zu (offset %zu)", __func__, __LINE__, + directive.before.size(), directive.before.p, + ++n, found.p - mfile.data); + } + } else { + dbgmsg("%s:%d not found: '%s' in \n'%.*s'", __func__, __LINE__, + directive.before.p, int(strlen(directive.before.p)), mfile.cur); + } + futures.push_back( future_replacement_t(directive, found) ); + } + } + + gcc_assert(!futures.empty()); + gcc_assert(futures.size() == replace_directives.top().size()); + + replace_t recognized; + + auto pnext = std::min_element(futures.begin(), futures.end()); + + for( const char *bol = mfile.cur; // more than one replacement may apply to a line + bol <= pnext->found.p && pnext->found.p < mfile.eol; ) { + auto& next(*pnext); + recognized = replace_t( next.found, span_t(strlen(next.directive.after.p), + next.directive.after.p) ); + maybe_add_space(next.directive.before, recognized); + pending_replacements.push_back(recognized); + bol = next.found.pend; + + if( yy_flex_debug ) { + size_t n = std::count((const char *)mfile.data, recognized.before.p, '\n'); + dbgmsg( "%s:%d: line %zu @ %zu: '%s'\n/%.*s/%.*s/", __func__, __LINE__, + ++n, next.found.p - mfile.data, + next.directive.before.p, + int(recognized.before.size()), recognized.before.p, + int(recognized.after.size()), recognized.after.p ); + } + + // Update the futures element for this pattern + cmatch cm; + + next.found = span_t(mfile.eodata, mfile.eodata); + + regex re(next.directive.before.p, extended_icase); + if( regex_search(bol, (const char *)mfile.eodata, cm, re) ) { + gcc_assert(cm[1].matched); + next.found = span_t( cm[1].first, cm[1].second ); + size_t n = std::count((const char *)mfile.data, next.found.p, '\n'); + if( false ) + dbgmsg("%s:%d next '%.*s' will be on line %zu (offset %zu)", __func__, __LINE__, + next.directive.before.size(), next.directive.before.p, + ++n, next.found.p - mfile.data); + } + pnext = std::min_element(futures.begin(), futures.end()); + } +} + +static void +check_source_format_directive( filespan_t& mfile ) { + const char *p = std::find(mfile.cur, mfile.eol, '>'); + if( ! (p < mfile.eol && p[1] == *p ) ) return; + + const char pattern[] = + ">>[[:blank:]]*source[[:blank:]]+" + "(format[[:blank:]]+)?" + "(is[[:blank:]]+)?" + "(fixed|free)"; + static regex re(pattern, extended_icase); + + // show contents of marked subexpressions within each match + cmatch cm; + if( regex_search(p, (const char *)mfile.eol, cm, re) ) { + gcc_assert(cm.size() > 1); + switch( cm[3].length() ) { + case 4: + cobol_set_indicator_column(0); + break; + case 5: + cobol_set_indicator_column(-7); + break; + default: + gcc_assert(cm[3].length() == 4 || cm[3].length() == 5); + break; + } + mfile.cur = const_cast(cm[0].second); + dbgmsg( "%s:%d: %s format set, on line %zu", __func__, __LINE__, + indicator.column == 7? "FIXED" : "FREE", mfile.lineno() ); + erase_line(const_cast(cm[0].first), + const_cast(cm[0].second)); + } +} + +struct buffer_t : public bytespan_t { + char *pos; // current output position + + buffer_t( char *data, char *eodata ) + : bytespan_t(data, eodata) + , pos(data) + { + if(pos) *pos = '\0'; + } + + size_t nline() const { + gcc_assert(data <= pos); + return std::count(data, pos, '\n'); + } + size_t free_space() const { gcc_assert(pos <= eodata); return eodata - pos; } + + bool pad_lines( size_t goal ) { + while( nline() < goal ) { + if( pos == eodata ) return false; + *pos++ = '\n'; + } + return true; + } + + void show() const { + gcc_assert(data <= pos); + dbgmsg("flex input buffer: '%.*s'\n[xelf]", int(pos - data), data); + } + void dump() const { + if( getenv("lexer_input") ) show(); + } +}; + +static bool +valid_sequence_area( const char *p, const char *eodata ) { + const char *pend = p + 6; + if ( eodata < pend ) return false; + + for( ; p < pend; p++ ) { + if( ! (ISDIGIT(*p) || *p == SPACE) ) { + return false; + } + } + return true; // characters either digits or blanks +} + +const char * esc( size_t len, const char input[] ); + +static bool +is_word_char( char ch ) { + switch(ch) { + case '$': + case '-': + case '_': + return true; + } + return ISALNUM(ch); +} + +static bool +is_numeric_char( char ch ) { + return ISDIGIT(ch) + || TOUPPER(ch) == 'E' + || ch == '.' + || ch == ',' + ; +} + +static bool +is_numeric_term( span_t term ) { + gcc_assert(term.p); + if( term.p[0] == '+' || term.p[0] == '-' ) term.p++; + auto p = std::find_if( term.p, term.pend, + []( char ch ) { + return ! is_numeric_char(ch); + } ); + return p == term.pend; +} + +struct replacing_term_t { + bool matched, done; + span_t leading_trailing, term, stmt; + + replacing_term_t(const char input[]) : matched(false), done(false) { + stmt = span_t(input, input); + } +}; + +extern YYLTYPE yylloc; + +static const char * +last_newline (const char *p, const char *pend ) { + size_t len = pend - p; + return static_cast( memrchr( p, '\n', len ) ); +} +/* + * For some statement parsed with regex_search, set yyloc to indicate the line + * and column spans of the term. Assume stmt begins at the start of a line. + */ +static void +update_yylloc( const csub_match& stmt, const csub_match& term ) { + gcc_assert(stmt.first <= term.first && term.second <= stmt.second); + + class dump_loc_on_exit { + public: + dump_loc_on_exit() { + if( getenv( "update_yylloc" ) ) + location_dump( "update_yylloc", __LINE__, "begin", yylloc); + } + ~dump_loc_on_exit() { + if( getenv( "update_yylloc" ) ) + location_dump( "update_yylloc", __LINE__, "end ", yylloc); + } + } dloe; + + size_t nline = std::count( stmt.first, term.second, '\n' ); + size_t n = std::count( term.first, term.second, '\n' ); + + if( nline ) { + yylloc.last_line += nline; + yylloc.first_line = yylloc.last_line - n; + } + + /* + * Set the column span for the term. + */ + const char *p = last_newline(stmt.first, stmt.second); + if( !p ) { // no newlines in entire statement + yylloc.first_column = (term.first - stmt.first) + 1; + yylloc.last_column = (term.second - stmt.first) + 1; + return; + } + + p = last_newline(stmt.first, term.first); + if( !p ) { // no newlines before term + yylloc.first_column = (term.first - stmt.first) + 1; + p = last_newline(term.first, term.second); + gcc_assert(p); // newline must be in term + yylloc.last_column = (term.second - p) + 1; + return; + } + + const char *bol = p; // bol points to last newline before term + + yylloc.first_column = term.first - bol; + p = last_newline(term.first, term.second); + if( p ) { // term has newlines, too + yylloc.last_column = (p - term.first); + } else { + yylloc.last_column = yylloc.first_column + term.length(); + } +} + +static replacing_term_t +parse_replacing_term( const char *stmt, const char *estmt ) { + gcc_assert(stmt); gcc_assert(estmt); gcc_assert(stmt < estmt); + replacing_term_t output(stmt); + + static const char pattern[] = + "^([[:space:];,]+(LEADING|TRAILING|BY))?" // 1, 2 + "[[:space:];,]+" // leading space between pairs + "(" // 3 + "(\"" "([\"]{2}|[^\"])*" "\")" // 4, 5 + "|" "('" "([']{2}|[^'])*" "')" // 6, 7 + "|" "(" "[+-]?[[:alnum:]]+([_-]+[[:alnum:]]+)*" ")" // 8, 9 + "|" "(==(" "(=?[^=]+)*" ")==)" // 10, 11, 12 + ")" + "(([[:space:];,]+[\"'=[:alnum:]+-]{1,2})" "|" "[[:space:];,]*([.]))?" // 13, 14, 15 + ; + + static regex re(pattern, extended_icase); + cmatch cm; + + if( ! regex_search( stmt, estmt, cm, re) ) return output; + + bool replacing_term = cm[2].matched && TOUPPER(cm[2].first[0]) == 'B'; + + if( cm[2].matched && ! replacing_term ) { + output.leading_trailing = cm[2]; + } + + // Apply such that quoted matches supersede word matches. + if( cm[11].matched ) output.term = cm[11]; + if( cm[ 8].matched ) output.term = cm[ 8]; + if( cm[ 6].matched ) output.term = cm[ 6]; + if( cm[ 4].matched ) output.term = cm[ 4]; + + // The matched segment extends to the end of the matched term, or to + // the dot at end of statement. Include the pseudotext ==, if found. + output.stmt = span_t(cm[0].first, output.term.pend); + if( cm[10].matched ) output.stmt.pend = cm[10].second; + + if( cm[15].matched && ISSPACE(cm[15].second[0]) ) { // matched end of statement + output.done = output.matched = true; + output.stmt = cm[0]; + gcc_assert(output.stmt.pend[-1] == '.'); + dbgmsg("%s:%d: done at '%.*s'", __func__, __LINE__, + output.term.size(), output.term.p); + return output; + } + + if( is_numeric_term(output.term) ) { + output.matched = output.stmt.p < output.term.p; + gcc_assert(output.matched); + // look for fractional part + if( is_numeric_char(*output.term.pend) && ISDIGIT(output.term.pend[1]) ) { + gcc_assert(!ISDIGIT(*output.term.pend)); + auto p = std::find_if(++output.term.pend, estmt, + []( char ch ) { return !ISDIGIT(ch); } ); + output.stmt.pend = output.term.pend = p; + output.done = '.' == output.stmt.pend[0] && ISSPACE(output.stmt.pend[1]); + if( output.done ) output.stmt.pend++; + } + dbgmsg("%s:%d: %s '%.*s'", __func__, __LINE__, + output.done? "done at" : "term is", + output.term.size(), output.term.p); + return output; + } + + if( yy_flex_debug ) { // should be looking only for words + dbgmsg("%s:%d: not done, working with '%.*s'", __func__, __LINE__, + cm[0].length(), cm[0].first); + int i=0; + for( auto m : cm ) { + if( m.matched ) + dbgmsg("%4d) '%.*s'", i, m.length(), m.first); + i++; + } + } + + if( !cm[8].matched ) { + output.matched = output.stmt.p < output.term.p; + gcc_assert(output.matched); + dbgmsg("%s:%d: term is '%.*s'", __func__, __LINE__, + output.term.size(), output.term.p); + return output; + } + + bool extraneous_replacing = 'R' == TOUPPER(cm[8].first[0]); // maybe + if( extraneous_replacing ) { // prove it + static const char replacing[] = "REPLACING"; + for( size_t i=0; i < strlen(replacing); i++ ) { + if( replacing[i] != TOUPPER(cm[8].first[i]) ) { + extraneous_replacing = false; + break; + } + } + if( extraneous_replacing ) { + update_yylloc( cm[0], cm[8] ); + yywarn("syntax error: invalid '%.*s'", cm[8].length(), cm[8].first); + output.matched = false; + return output; + } + } + + gcc_assert(cm[8].matched); + gcc_assert(0 < output.term.size()); + + dbgmsg("%s:%d: more words starting at '%.80s'", __func__, __LINE__, + output.term.pend); + + static const char term_pattern[] = + "^[[:space:]]+" + "(" "(IN|OF)[[:space:]]+" ")" // 1, 2 + "(" "[+-]?[[:alnum:]]+([$_-]+[[:alnum:]]+)*" ")" // 3, 4 + "(" "[[:space:]]*[(]" ")?" // 5 + "(([[:space:]]+[\"'=[:alnum:]+-]{1,2})" "|" "[[:space:]]*([.]))?" // 6, 7, 8 + ; + static const char paren_pattern[] = + "^[[:space:]]*" + "(" "[()][^()]*[()]" ")" // 1 + "(([[:space:]]+[\"'=[:alnum:]+-]{1,2})" "|" "[[:space:]]*([.]))?" // 2, 3, 4 + ; + + regex term_re(term_pattern, extended_icase); + regex paren_re(paren_pattern, extended_icase); + ssize_t nsub = 0; + + while( regex_search( output.term.pend, estmt, cm, term_re) ) { + output.stmt.pend = output.term.pend = cm[3].second; // found a word + if( cm[5].matched ) break; // found left parenthesis + const csub_match& done(cm[8]); + if( done.matched ) { + output.done = output.matched = output.stmt.p < output.term.p; + gcc_assert(output.done); + goto matched; + } + } + + // match subscripts, if any + while( regex_search( output.term.pend, estmt, cm, paren_re) ) { + output.stmt.pend = output.term.pend = cm[1].second; + if( cm[1].first[0] == '(' ) nsub++; + if( cm[1].first[0] == ')' ) nsub--; + if( cm[1].second[-1] == '(' ) nsub++; + if( cm[1].second[-1] == ')' ) nsub--; + + const csub_match& done(cm[4]); + if( done.matched ) { + output.matched = output.stmt.p < output.term.p; + output.stmt.pend = done.second; + output.done = output.stmt.pend[-1] == '.'; + goto matched; + } + + if( nsub == 0 ) break; + } + + matched: + output.matched = output.stmt.p < output.term.p; + + if( yy_flex_debug ) { + const char *status = "unmatched"; + if( output.matched ) status = output.done? "done" : "matched"; + dbgmsg("%s:%d: %s term is '%.*s'", __func__, __LINE__, status, + output.term.size(), output.term.p? output.term.p : ""); + } + return output; +} + +struct replacing_pair_t { + span_t leading_trailing, stmt; + replace_t replace; + + bool matched() const { return 0 < stmt.size(); } + bool done() const { return matched() && stmt.pend[-1] == '.'; } +}; +static replacing_pair_t +parse_replacing_pair( const char *stmt, const char *estmt ) { + replacing_pair_t pair; + + pair.replace = replace_t(); + auto parsed = parse_replacing_term( stmt, estmt ); // before + if( parsed.matched ) { + if( parsed.term.size() == 0 ) return pair; // failure: empty before string + pair.leading_trailing = parsed.leading_trailing; + pair.stmt = parsed.stmt; + pair.replace.before = parsed.term; + + if( !parsed.done ) { + parsed = parse_replacing_term( pair.stmt.pend, estmt ); // after + if( parsed.matched ) { + pair.stmt.pend = parsed.stmt.pend; + pair.replace.after = parsed.term; + } else { + dbgmsg("%s:%d: not matched '%.*s'", __func__, __LINE__, + pair.stmt.size(), pair.stmt.p); + } + } + if( yy_flex_debug ) { + const char *status = "unmatched"; + if( pair.matched() ) status = pair.done()? "done" : "matched"; + dbgmsg("%s:%d: [%s] replacing '%.*s' with '%.*s'", __func__, __LINE__, + status, + pair.replace.before.size(), pair.replace.before.p, + pair.replace.after.size(), pair.replace.after.p); + } + } else { + for( auto p = stmt; (p = std::find(p, estmt, '.')) < estmt; p++ ) { + if( ISSPACE(p[1]) ) { + pair.stmt = span_t(stmt, ++p); + break; + } + } + if( pair.stmt.p ) { + yywarn("CDF syntax error '%*s'", (int)pair.stmt.size(), pair.stmt.p); + } + else { + // This eliminated a compiler warning about "format-overflow" + yywarn("CDF syntax error"); + } + pair.stmt = span_t(0UL, stmt); + pair.replace = replace_t(); + } + return pair; +} + +static std::pair, char *> +parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) { + std::list pairs ; + + static const char any_ch[] = "."; + static const char word_ch[] = "[[:alnum:]$_-]"; + static const char nonword_ch[] = "[^[:alnum:]\"'$_-]"; + + // Pattern to find one REPLACE pseudo-text pair + static const char replace_pattern[] = + "([[:space:]]+(LEADING|TRAILING))?" // 1, 2 + "[[:space:]]+" + "==(" "(=?[^=]+)+" ")==" // 3, 4 + "[[:space:]]+BY[[:space:]]+" + "==(" "(=?[^=]+)*" ")==" // 5, 6 + "(([[:space:]]+[\"'=[:alnum:]+-]{1,2})" "|" "[[:space:]]*([.]))?" // 7, 8, 9 + ; + + regex pair_re(replace_pattern, extended_icase); + cmatch cm; + replacing_pair_t parsed; + bool end_of_stmt = false; + + for( auto p = stmt; p < estmt && !end_of_stmt; p = parsed.stmt.pend ) { + if( is_copy_stmt ) { + parsed = parse_replacing_pair(p, estmt); + if( parsed.replace.before.size() == 0 ) break; // empty before + if( parsed.replace.after.p == NULL ) break; // invalid after + end_of_stmt = parsed.done(); + } else { + if( ! regex_search( p, estmt, cm, pair_re) ) break; + // Report findings. + if( false && yy_flex_debug ) { + for( size_t i=0; i < cm.size(); i++ ) { + dbgmsg("%s: %s %zu: '%.*s'", __func__, + cm[i].matched? "Pair" : "pair", + i, + cm[i].matched? int(cm[i].length()) : 0, + cm[i].matched? cm[i].first : ""); + } + } + gcc_assert(cm[3].matched); + gcc_assert(cm[5].matched); + parsed.leading_trailing = cm[2]; + parsed.replace.before = cm[3]; + parsed.replace.after = cm[5]; + + parsed.stmt = cm[0]; + // If not done, exclude trailing portion from statement match. + if( !parsed.done() && cm[8].matched ) { + gcc_assert(!cm[9].matched); + parsed.stmt.pend = cm[8].first; + } + } + + span_t& before(parsed.replace.before); + span_t& after(parsed.replace.after); + + const char *befter[2] = { nonword_ch, nonword_ch }; + gcc_assert(before.p < before.pend); + if( !is_word_char(before.p[0]) ) befter[0] = any_ch; + if( !is_word_char(before.pend[-1]) ) befter[1] = any_ch; + + const char *src = esc(before.size(), before.p); + + if( parsed.leading_trailing.size() > 0 ) { + switch( TOUPPER(parsed.leading_trailing.p[0]) ) { + case 'L': // leading + befter[1] = word_ch; + break; + case 'T': // trailing + befter[0] = word_ch; + break; + default: + gcc_unreachable(); + } + dbgmsg("%s:%d: dealing with %.*s", __func__, __LINE__, + int(parsed.leading_trailing.size()), parsed.leading_trailing.p); + } + + src = xasprintf("%s(%s)%s", befter[0], src, befter[1]); + + struct { span_t before, after; } output; + output.before = span_t(strlen(src), src); + output.after = after.dup(); + + gcc_assert(!before.has_nul()); + pairs.push_back( replace_t( output.before, output.after ) ); + + // COPY REPLACING matches end-of-statment here + // REPLACE matched end-of-statement in caller, and estmt[-1] == '.' + if( is_copy_stmt && parsed.stmt.pend[-1] == '.' ) break; + } + + if( yy_flex_debug ) { + dbgmsg( "%s:%d: %s: %zu pairs parsed from '%.*s'", __func__, __LINE__, + parsed.done()? "done" : "not done", + pairs.size(), parsed.stmt.size(), parsed.stmt.p ); + int i = 0; + for( const auto& replace : pairs ) { + dbgmsg("%s:%d:%4d: '%s' => '%s'", __func__, __LINE__, + ++i, replace.before.p, replace.after.p); + } + } + if( !parsed.done() ) { + pairs.clear(); + return std::make_pair(pairs, const_cast(stmt)); + } + + return std::make_pair(pairs, const_cast(parsed.stmt.pend)); +} + +struct copy_descr_t { + bool parsed; + int fd; + size_t nreplace; + span_t partial_line, erased_lines; + + copy_descr_t( const char *line, const char *eol) + : parsed(false), fd(-1), nreplace(0), partial_line(line, eol) {} +}; + +static YYLTYPE +location_in( const filespan_t& mfile, const csub_match cm ) { + YYLTYPE loc { + int(mfile.lineno() + 1), int(mfile.colno() + 1), + int(mfile.lineno() + 1), int(mfile.colno() + 1) + }; + gcc_assert(mfile.cur <= cm.first && cm.second <= mfile.eodata); + auto nline = std::count(cm.first, cm.second, '\n'); + if( nline ) { + gcc_assert(loc.first_line < nline); + loc.first_line -= nline; + auto p = static_cast(memrchr(cm.first, '\n', cm.length())); + loc.last_column = (cm.second) - p; + } + location_dump(__func__, __LINE__, "copy?", loc); + return loc; +} + +static copy_descr_t +parse_copy_directive( filespan_t& mfile ) { + static const char *most_recent_buffer; + static span_t copy_stmt(mfile.eodata, mfile.eodata); + + static const char pattern[] = + "COPY" "[[:space:]]+" + /* 1 */ "(" + /*2,3*/ "\"(" "([\"]{2}|[^\"])+" ")\"" + /*4,5*/ "|" "'(" "([']{2}|[^'])+" ")[']" + /*6,7*/ "|" "(" "[[:alnum:]]+([_-]+[[:alnum:]]+)*" ")" + /* */ ")" + /* 8 */ "(" + /* 9 */ "[[:space:]]+(OF|IN)[[:space:]]+" + /* 10*/ "(" + /*11,12*/ "(\"" "([\"]{2}|[^\"])+" "\")" + /*13,14*/ "|" "('" "([']{2}|[^'])+" "')" + /*15,16*/ "|" "(" "[[:alnum:]]+([_-]+[[:alnum:]]+)*" ")" + /* */ ")" + /* */ ")?" + /*17,18*/ "([[:space:]]+SUPPRESS([[:space:]]+PRINTING)?)?" + /*19,20 */ "(" "([[:space:]]*[.])" "|" "[[:space:]]+REPLACING" ")" + ; + + static regex re(pattern, extended_icase); + cmatch cm; + copy_descr_t outcome(mfile.cur, mfile.cur); + + // COPY appears in current buffer? + if( most_recent_buffer != mfile.data || copy_stmt.p < mfile.cur ) { + most_recent_buffer = mfile.data; + copy_stmt.p = mfile.eodata; + + if( regex_search(mfile.ccur(), + (const char *)mfile.eodata, cm, re) ) { + copy_stmt = span_t( cm[0].first, cm[0].second ); + if( yy_flex_debug ) { + size_t nnl = 1 + count_newlines(mfile.data, copy_stmt.p); + size_t nst = 1 + count_newlines(copy_stmt.p, copy_stmt.pend); + dbgmsg("%s:%d: line %zu: COPY directive is %zu lines '%.*s'", + __func__, __LINE__, + nnl, nst, copy_stmt.size(), copy_stmt.p); + } + } + } + + // If COPY appears on the current line, parse it completely this time. + if( mfile.cur <= copy_stmt.p && + copy_stmt.p < mfile.eol ) { + outcome.parsed = regex_search(copy_stmt.p, copy_stmt.pend, cm, re); + gcc_assert(outcome.parsed); + outcome.partial_line = span_t(mfile.cur, copy_stmt.p); + + if( yy_flex_debug ) { + dbgmsg("%zu expressions", std::count(pattern, pattern + sizeof(pattern), '(')); + int i = 0; + for( const auto& m : cm ) { + if( m.matched ) + dbgmsg("%s:%d: %2d: '%.*s'", __func__, __LINE__, + i, int(m.length()), m.first); + i++; + } + } + + auto& copybook_name = cm[1]; + auto& library_name = cm[10]; + + bool replacing = !cm[20].matched; + + if( library_name.matched ) { + YYLTYPE loc = location_in( mfile, library_name ); + copybook.library( loc, xstrndup(library_name.first, library_name.length()) ); + } + YYLTYPE loc = location_in( mfile, copybook_name ); + outcome.fd = copybook.open( loc, xstrndup(copybook_name.first, + copybook_name.length()) ); + if( outcome.fd == -1 ) { // let parser report missing copybook + dbgmsg("%s:%d: (no copybook '%s' found)", __func__, __LINE__, copybook.source()); + return outcome; + } + + if( replacing ) { + std::pair, char*> + result = parse_replace_pairs( cm[0].second, mfile.eodata, true ); + + std::list& replacements(result.first); + outcome.parsed = (outcome.nreplace = replacements.size()) > 0; + if( outcome.parsed ) { + replace_directives.push(replacements); + } + copy_stmt.pend = result.second; + + // Maybe we don't need these. We'll see. + for( const auto& r : replacements ) { + copybook.replacement(pseudo_e, r.before.dup().p, r.after.dup().p); + } + } + + // If the parse failed, pass it through to the parser for analysis. + if( outcome.parsed ) { + erase_line( const_cast(copy_stmt.p), + const_cast(copy_stmt.pend)); + outcome.erased_lines = copy_stmt; + } + + mfile.eol = const_cast(copy_stmt.pend); + mfile.next_line(); + } + return outcome; +} + +static char * +parse_replace_last_off( filespan_t& mfile ) { + static const char pattern[] = + "REPLACE" "[[:space:]]+" + "(LAST[[:space:]]+)?OFF[[:space:]]*[.]" + ; + static regex re(pattern, extended_icase); + cmatch cm; + + // REPLACE [LAST] OFF? + bool found = regex_search(mfile.ccur(), + (const char *)mfile.eodata, cm, re); + gcc_assert(found); // caller ensures + + gcc_assert(cm.size() == 2); + // LAST OFF removes most recent REPLACE + if( cm[1].matched ) { + gcc_assert(TOUPPER(cm[1].first[0]) == 'L'); + if( ! replace_directives.empty() ) { + replace_directives.pop(); + } + } else { // OFF clears the REPLACE stack + while( ! replace_directives.empty() ) { + replace_directives.pop(); + } + } + + dbgmsg( "%s:%d: line %zu: parsed '%.*s', ", __func__, __LINE__, + mfile.lineno(), int(cm[0].length()), cm[0].first ); + + // Remove statement from input + erase_line(const_cast(cm[0].first), + const_cast(cm[0].second)); + + return const_cast(cm[0].second); +} + +static span_t +parse_replace_text( filespan_t& mfile ) { + static const char pattern[] = + /* 0 */ "REPLACE" + /* 1 */ "([[:space:]]+ALSO)?" + /* 2 */ "(" + /*3,4*/ "([[:space:]]+(LEADING|TRAILING))?" + /* 5 */ "([[:space:]]+" + /* 6 */ "==" "(=?[^=]+)+" "==" + /* */ "[[:space:]]+BY[[:space:]]+" + /* 7 */ "==" "(=?[^=]+)*" "==" + /* */ ")" + /* */ ")+[[:space:]]*[.]" + ; + static regex re(pattern, extended_icase); + cmatch cm; + const size_t current_lineno(mfile.lineno()); + + if( false && yy_flex_debug ) { + auto pend = mfile.eol; + gcc_assert(mfile.line_length() > 2); + if( pend[-1] == '\n' ) pend -= 2; + auto len = int(pend - mfile.cur); + dbgmsg("%s:%d: line %zu: parsing '%.*s", __func__, __LINE__, + current_lineno, len, mfile.cur); + } + + if( ! regex_search(mfile.ccur(), (const char *)mfile.eodata, cm, re) ) { + dbgmsg( "%s:%d: line %zu: not a REPLACE statement:\n'%.*s'", + __func__, __LINE__, current_lineno, + int(mfile.line_length()), mfile.cur ); + return span_t(); + } + + // Report findings. + if( yy_flex_debug ) { + dbgmsg("%zu expressions", std::count(pattern, pattern + sizeof(pattern), '(')); + int i = 0; + for( const auto& m : cm ) { + if( m.matched ) + dbgmsg("%s:%d: %2d: '%.*s'", __func__, __LINE__, + i, int(m.length()), m.first); + i++; + } + } + + gcc_assert(cm.size() > 7); + + // Update active REPLACE stack + if( ! cm[1].matched ) { // ALSO pushes, else clear stack and push one. + while( !replace_directives.empty() ) { + replace_directives.pop(); + } + } else { + gcc_assert(TOUPPER(cm[1].first[0]) == 'A'); + } + + span_t replace_stmt(cm[0].first, cm[0].second); + + std::pair, char*> + result = parse_replace_pairs(replace_stmt.p, replace_stmt.pend, false); + std::list& replacements(result.first); + replace_directives.push( replacements ); + + if( yy_flex_debug ) { + dbgmsg( "%s:%d: line %zu: %zu pairs parsed from '%.*s'", __func__, __LINE__, + current_lineno, replacements.size(), int(replace_stmt.size()), replace_stmt.p ); + for( const auto& replace : replacements ) { + int i = 0; + dbgmsg("%s:%d:%4d: '%s' => '%s'", __func__, __LINE__, + ++i, replace.before.p, replace.after.p); + } + } + + // Remove statement from input + erase_line(const_cast(replace_stmt.p), + const_cast(replace_stmt.pend)); + + return replace_stmt; +} + +static span_t +parse_replace_directive( filespan_t& mfile ) { + static const char *most_recent_buffer, *next_directive; + static bool off_coming_up; + static const char pattern[] = + "REPLACE" "[[:space:]]+" "(LAST|OFF|ALSO|LEADING|TRAILING|==)"; + + static regex re(pattern, extended_icase); + cmatch cm; + + // REPLACE appears in current buffer? + if( most_recent_buffer != mfile.data || next_directive < mfile.cur ) { + most_recent_buffer = mfile.data; + next_directive = mfile.eodata; + + if( regex_search(mfile.ccur(), + (const char *)mfile.eodata, cm, re) ) { + gcc_assert(cm[1].matched); + next_directive = cm[0].first; + + switch( TOUPPER(cm[1].first[0]) ) { + case 'L': + off_coming_up = 'A' == TOUPPER(cm[1].first[1]); // LAST OFF, else LEADING + break; + case 'O': // OFF + off_coming_up = true; + break; + case 'A': case 'T': case '=': // [ALSO] [ eading/Trailing] == ... + off_coming_up = false; + break; + default: + gcc_unreachable(); + } + } + } + + span_t erased; + // REPLACE appears on current line? + if( mfile.cur <= next_directive && + next_directive < mfile.eol ) { + if( off_coming_up ) { + parse_replace_last_off(mfile); + } else { + erased = parse_replace_text(mfile); + } + } + return erased; +} + +/* + * Maintain the number of newlines by counting those that will be + * overwritten, and appending them to the appended line. Return the + * new EOL pointer. + * + * The newlines accumulate past eodata, at the start of the blank + * lines created by the caller. + */ +char * +bytespan_t::append( const char *input, const char *eoinput ) { + gcc_assert(data < eodata); + +#define LEXIO 0 +#if LEXIO + auto nq = std::count_if(data, eodata, isquote); + dbgmsg("%s:%3d: input ------ '%.*s'", __func__, __LINE__, int(eoinput - input), input); + dbgmsg("%s:%3d: precondition '%.*s' (%zu: %s)", __func__, __LINE__, + int(size()), data, nq, in_string()? "in string" : "not in string"); +#endif + if( !in_string() ) { // Remove trailing space unless it's part of a literal. + while(data < eodata && ISSPACE(eodata[-1])) eodata--; + gcc_assert(ISSPACE(eodata[0])); + gcc_assert(data == eodata || !ISSPACE(eodata[-1])); + } + // skip leading blanks + while( input < eoinput && ISSPACE(*input) ) input++; + if( isquote(*input) ) input++; + + size_t len = eoinput - input; + char * pend = eodata + len; + + int nnl = std::count(eodata, pend, '\n'); // newlines to be overwritten + gcc_assert(0 == std::count(input, eoinput, '\n')); // newlines in input + + memmove(eodata, input, len); + nnl += std::count(pend, pend + nnl, '\n'); // other newlines to be overwritten + std::fill(pend, pend + nnl, '\n'); + + eodata = pend; + +#if LEXIO + dbgmsg("%s:%3d: postcondition '%.*s'", __func__, __LINE__, int(size() + len) - 1, data); +#endif + + return eodata; +} + +const char * cobol_filename(); + +static filespan_t& +mapped_file( FILE *input ) { + static std::map inputs; + + int fd = fileno(input); + gcc_assert(fd > 0); + + filespan_t& mfile = inputs[fd]; + if( mfile.data ) { + return mfile; + } + + struct stat sb; + if( 0 != fstat(fd, &sb) ) { + cbl_err( "%s: could not stat fd %d", __func__, fd ); + } + + mfile.use_nada(); + + if( sb.st_size > 0 ) { + static const int flags = MAP_PRIVATE; + + void *p = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, flags, fd, 0); + if( p == MAP_FAILED ) { + cbl_err( "%s: could not map fd %d", __func__, fd ); + } + + mfile.lineno_reset(); + mfile.data = mfile.cur = mfile.eol = mfile.eodata = static_cast(p); + mfile.eodata += sb.st_size; + } + return mfile; +} + +char filespan_t::empty_file[8] = " \n"; + +static void unmap_file( filespan_t& mfile ) { + if( ! mfile.nada() ) { + munmap(mfile.data, mfile.size() - 1); + } + mfile = filespan_t(); +} + +extern int yylineno; + +static void +print_lexer_input( const char *buf, const char *ebuf ) { + const char *eol, *lexio = getenv("lexio"); + int i; + static int nbuf = 1; + static FILE *output = NULL; + + if( !lexio ) return; + if( !output ) { + output = fopen( lexio, "w" ); + if( !output ) output = stderr; + } + + fprintf( output, "*> buffer %d\n", nbuf ); + for( i = 0, eol = std::find(buf, ebuf, '\n'); + eol != ebuf; buf = eol, eol = std::find(buf, ebuf, '\n'), i++ ) { + eol++; + fprintf( output, "%5d %.*s", yylineno + i, int(eol - buf), buf ); + } + if( buf < ebuf ) { + fprintf( output, "%5d %.*s", yylineno + i, int(eol - buf), buf ); + } + fprintf( output, "*> endbuf %d\n", nbuf++ ); + fflush(output); +} + +/* + * Fill about as much of the lexer's buffer as possible, except skip + * leading blanks on blank lines. + */ +int +lexer_input( char buf[], int max_size, FILE *input ) { + filespan_t& mfile( mapped_file(input) ); + + if( mfile.cur == mfile.eodata ) { + if( mfile.cur ) unmap_file(mfile); + return 0; + } + + gcc_assert( mfile.data <= mfile.cur && mfile.cur < mfile.eodata ); + + char *next = std::min(mfile.eodata, mfile.cur + max_size); + buffer_t output(buf, buf + max_size); // initializes pos + + // Fill output, keeping only NL for blank lines. + for( auto p = mfile.cur; p < next; *output.pos++ = *p++ ) { + static bool at_bol = false; + if( at_bol ) { + auto nonblank = std::find_if( p, next, + []( char ch ) { + return !isblank(ch); } ); + if( nonblank + 1 < next ) { + if( *nonblank == '\r' ) nonblank++; // Windows + if( *nonblank == '\n' ) { + p = nonblank; + continue; + } + } + } + at_bol = *p == '\n'; + } + + gcc_assert( output.pos <= output.eodata ); + output.eodata = output.pos; + + mfile.cur = next; + gcc_assert(mfile.cur <= mfile.eodata); + + // Buffer full or input exhausted. + print_lexer_input(output.data, output.eodata); + + return output.size(); +} + +static const char * +find_filter( const char filter[] ) { + + if( 0 == access(filter, X_OK) ) { + return filter; + } + + const char *path = getenv("PATH"); + if( ! path ) return NULL; + char *p = xstrdup(path), *eopath = p + strlen(p); + + while( *p != '\0' ) { + auto pend = std::find( p, eopath, ':' ); + if( *pend == ':' ) *pend++ = '\0'; + + char *name = xasprintf( "%s/%s", p, filter ); + + if( 0 == access(name, X_OK) ) { + return name; + } + p = pend; + } + return NULL; +} + +bool verbose_file_reader = false; + +typedef std::pair > preprocessor_filter_t; +static std::list preprocessor_filters; +static std::list included_files; + +/* + * Keep a list of files added with -include on the command line. + */ +bool +include_file_add(const char filename[]) { + struct stat sb; + if( -1 == stat(filename, &sb) ) return false; + included_files.push_back(filename); + return true; +} + +bool +preprocess_filter_add( const char input[] ) { + char filter[ strlen(input) + 1 ]; + strcpy(filter, input); + char *optstr = strchr(filter, ','); + std::list options; + + if( optstr ) { + for( char *opt = optstr + 1; (opt = strtok(opt, ",")); opt = NULL ) { + options.push_back(opt); + } + *optstr = '\0'; + } + + auto filename = find_filter(filter); + if( !filename ) { + yywarn("preprocessor '%s/%s' not found", getcwd(NULL, 0), filter); + return false; + } + preprocessor_filters.push_back( std::make_pair(xstrdup(filename), options) ); + return true; +} + +void +cdftext::echo_input( int input, const char filename[] ) { + int fd; + if( -1 == (fd = dup(input)) ) { + yywarn( "could not open preprocessed file %s to echo to standard output", + filename ); + return; + } + + auto mfile = map_file(fd); + + if( -1 == write(STDOUT_FILENO, mfile.data, mfile.size()) ) { + yywarn( "could not write preprocessed file %s to standard output", + filename ); + } + if( -1 == munmap(mfile.data, mfile.size()) ) { + yywarn( "could not release mapped file" ); + } + if( -1 == close(fd) ) { + yywarn( "could not close mapped file" ); + } +} + +static inline ino_t +inode_of( int fd ) { + struct stat sb; + if( -1 == fstat(fd, &sb) ) { + cbl_err( "could not stat fd %d", fd); + } + return sb.st_ino; +} + +FILE * +cdftext::lex_open( const char filename[] ) { + int input = open_input( filename ); + if( input == -1 ) return NULL; + + int output = open_output(); + + // Process any files supplied by the -include comamnd-line option. + for( auto name : included_files ) { + int input; + if( -1 == (input = open(name, O_RDONLY)) ) { + yyerrorvl(1, "", "cannot open -include file %s", name); + continue; + } + cobol_filename(name, inode_of(input)); + filespan_t mfile( free_form_reference_format( input ) ); + + process_file( mfile, output ); + } + + cobol_filename(filename, inode_of(input)); + filespan_t mfile( free_form_reference_format( input ) ); + + process_file( mfile, output ); + + if( lexer_echo() ) { + echo_input(output, filename); + } + + for( auto filter_pair : preprocessor_filters ) { + input = output; + output = open_output(); + + char *filter = filter_pair.first; + std::list& options = filter_pair.second; + + char * argv[2 + options.size()] = { filter }; + + auto last_argv = std::transform( options.begin(), options.end(), argv + 1, + []( std::string& opt ) { + return xstrdup(opt.c_str()); + } ); + *last_argv = NULL; + + pid_t pid = fork(); + + switch(pid){ + case -1: cbl_err( "%s", __func__); + break; + case 0: // child + if( -1 == dup2(input, STDIN_FILENO) ) { + cbl_err( "%s: could not dup input", __func__); + } + if( -1 == dup2(output, STDOUT_FILENO) ) { + cbl_err( "%s: could not dup output", __func__); + } + if( -1 == lseek(STDIN_FILENO, SEEK_SET, 0) ) { + cbl_err( "%s: could not seek to start of file", __func__); + } + int erc; + if( -1 == (erc = execv(filter, argv)) ) { + yywarn("could not execute %s", filter); + } + _exit(erc); + } + int status; + auto kid = wait(&status); + gcc_assert(pid == kid); + if( kid == -1 ) cbl_err( "failed waiting for pid %d", pid); + + if( WIFSIGNALED(status) ) { + cbl_errx( "%s pid %d terminated by %s", + filter, kid, strsignal(WTERMSIG(status)) ); + } + if( WIFEXITED(status) ) { + if( (status = WEXITSTATUS(status)) != 0 ) { + cbl_errx( "%s exited with status %d", + filter, status); + } + } + yywarn( "applied %s", filter ); + } + + return fdopen( output, "r"); +} + +int +cdftext::open_input( const char filename[] ) { + int fd = open(filename, O_RDONLY); + if( fd == -1 ) { + dbgmsg( "could not open '%s': %m", filename ); + } + + verbose_file_reader = NULL != getenv("GCOBOL_TEMPDIR"); + + if( verbose_file_reader ) { + yywarn("verbose: opening %s for input", filename); + } + return fd; +} + +int +cdftext::open_output() { + char *name = getenv("GCOBOL_TEMPDIR"); + int fd; + + if( name && 0 != strcmp(name, "/") ) { + char * stem = xasprintf("%sXXXXXX", name); + if( -1 == (fd = mkstemp(stem)) ) { + cbl_err( "could not open temporary file '%s' (%s)", + name, realpath(name, stem)); + } + return fd; + } + + FILE *fh = tmpfile(); + if( !fh ) { + cbl_err("could not create temporary file"); + } + + return fileno(fh); +} + +filespan_t +cdftext::map_file( int fd ) { + gcc_assert(fd > 0); + + filespan_t mfile; + mfile.use_nada(); + + struct stat sb; + do { + if( 0 != fstat(fd, &sb) ) { + cbl_err( "%s: could not stat fd %d", __func__, fd ); + } + if( S_ISFIFO(sb.st_mode) ) { + // Copy FIFO to regular file that can be mapped. + int input = open_output(); + std::swap(fd, input); // fd will continue to be the input + static char block[4096 * 4]; + ssize_t n; + while( (n = read(input, block, sizeof(block))) != 0 ) { + ssize_t nout = write(fd, block, n); + if( nout != n ) { + cbl_err( "%s: could not prepare map file from FIFO %d", + __func__, input); + } + if( false ) dbgmsg("%s: copied %ld bytes from FIFO", + __func__, nout); + } + } + } while( S_ISFIFO(sb.st_mode) ); + + if( sb.st_size > 0 ) { + static const int flags = MAP_PRIVATE; + + void *p = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, flags, fd, 0); + if( p == MAP_FAILED ) { + cbl_err( "%s: could not map fd %d", __func__, fd ); + } + + mfile.lineno_reset(); + mfile.data = mfile.cur = mfile.eol = mfile.eodata = static_cast(p); + mfile.eodata += sb.st_size; + } + + return mfile; +} + +bool lexio_dialect_mf(); + +filespan_t +cdftext::free_form_reference_format( int input ) { + filespan_t source_buffer = map_file(input); + filespan_t mfile(source_buffer); + + /* + * current_line_t describes the segment of mapped file that is the + * "current line" being processed. Its only use is for line + * continuation, whether string literals or not. + */ + struct current_line_t { + size_t lineno; + bytespan_t line; + // construct with length zero + current_line_t( char data[] ) : lineno(0), line(data, data) {} + } current( mfile.data ); + + /* + * If the format is not explicitly set on the command line, test the + * first 6 bytes of the first file to determine the format + * heuristically. If the first 6 characters are only digits or + * blanks, then the file is in fixed format. + */ + + if( indicator.inference_pending() ) { + const char *p = mfile.data; + while( p < mfile.eodata ) { + const char * pend = + std::find(p, const_cast(mfile.eodata), '\n'); + if( 6 < pend - p ) break; + p = pend; + if( p < mfile.eodata) p++; + } + if( valid_sequence_area(p, mfile.eodata) ) indicator.column = 7; + + dbgmsg("%s:%d: %s format detected", __func__, __LINE__, + indicator.column == 7? "FIXED" : "FREE"); + } + + while( mfile.next_line() ) { + check_source_format_directive(mfile); + remove_inline_comment(mfile.cur, mfile.eol); + + if( mfile.is_blank_line() ) continue; + + char *indcol = indicated(mfile.cur, mfile.eol); // true only for fixed + // // format + + if( is_fixed_format() && !indcol ) { // short line + erase_source(mfile.cur, mfile.eol); + } + + if( indcol ) { + // Set to blank columns 1-6 and anything past the right margin. + erase_source(mfile.cur, indcol); + if( is_reference_format() ) { + if( mfile.cur + right_margin() < mfile.eol ) { + auto p = std::find(mfile.cur + right_margin(), mfile.eol, '\n'); + erase_source(mfile.cur + right_margin(), p); + } + } + + switch( TOUPPER(*indcol) ) { + case '-': + gcc_assert(0 < current.line.size()); + /* + * The "current line" -- the line being continued -- may be many + * lines earlier (with many intervening newlines) or may intrude + * on its succeeding line. Erase the continuation line. + */ + { + char *pend = mfile.eol; + if( right_margin() ) { + pend = std::min(mfile.cur + right_margin(), mfile.eol); + } + // The appended segment has no newline because the erased line retains + // one. + pend = std::find(indcol + 1, pend, '\n'); + char *p = current.line.append(indcol + 1, pend ); + if( (p = std::max(p, mfile.cur)) < mfile.eol ) { + erase_source(p, mfile.eol); + } + } + continue; + case SPACE: + break; + case 'D': + /* + * Pass the D to the lexer, because WITH DEBUGGING MODE is + * parsed in the parser. This assumes too strict a rule: that + * all the source is in one format. In fact, DEBUGGING MODE + * could be set on, and >>SOURCE-FORMAT can switch back and + * forth. To solve that, we'd have to parse WITH DEBUGGING MODE + * in free_form_reference_format(), which is a lot of work for + * an obsolete feature. + */ + break; + case '*': + case '/': + if( indcol < mfile.eol - 1 ) { + erase_source(indcol, mfile.eol); + } + continue; + case '$': + if( lexio_dialect_mf() ) { + break; + } + __attribute__ ((fallthrough)); + default: // flag other characters in indicator area + if( ! ISSPACE(indcol[0]) ) { + yyerrorvl( mfile.lineno(), cobol_filename(), + "error: stray indicator '%c' (0x%x): \"%.*s\"", + indcol[0], indcol[0], + int(mfile.line_length() - 1), mfile.cur ); + *indcol = SPACE; + } + break; + } + } + current.line.update(mfile.cur, mfile.eol, right_margin()); + current.lineno = mfile.lineno(); + } // next line + + return source_buffer; +} + +/* + * process_file is a recursive routine that opens and processes + * included files. It uses the input file stack in two ways: to check + * copybook uniqueness, and (via the lexer) to keep track filenames + * and line numbers. + * + * When reading copybook files, the copybook object enforces the rule + * that no copybook may include itself, even indirectly. It does that + * by relying on the unique_stack to deny a push. Because the reader + * makes no attempt to count lines, line numbers in the input stack + * are all 1 at this point. + * + * When returning from the top-level recursion, the input stack has + * the original file's name on top, with depth 1. At that point, the + * lexer begins tokenizing the input. + * + * The input stream sent to the lexer is delimited by #FILE tokens + * denoting the source filename. As far as the lexer is concerned, + * there's only ever one file: the name passed to lex_open() when we + * kicked things off. But messages and the debugger need to know + * which file and line each statment appeared in. + * + * The lexer uses the input stack to keep track of names and + * numbers. The top of the input file stack is the current file + * context, initially set to line 1. When the lexer sees a push, it + * updates the top-of-stack with the current line number, yylineno, + * and then pushes the copybook filename with line 1. When it sees a + * pop, the current file is popped, of course; its line number no + * longer matters. Then the top-of-stack is used to update the current + * cobol filename and yylineno. + */ +void +cdftext::process_file( filespan_t mfile, int output, bool second_pass ) { + static size_t nfiles = 0; + std::list replacements; + + __gnu_cxx::stdio_filebuf outbuf(fdopen(output, "w"), std::ios::out); + std::ostream out(&outbuf); + std::ostream_iterator ofs(out); + + // indicate current file + static const char file_push[] = "\f#FILE PUSH ", file_pop[] = "\f#FILE POP\f"; + + if( !second_pass && nfiles++ ) { + static const char delimiter[] = "\f"; + const char *filename = cobol_filename(); + std::copy(file_push, file_push + strlen(file_push), ofs); + std::copy(filename, filename + strlen(filename), ofs); + std::copy(delimiter, delimiter + strlen(delimiter), ofs); + out.flush(); + } + + // parse CDF directives + while( mfile.next_line() ) { + yylloc = mfile.as_location(); + auto copied = parse_copy_directive(mfile); + if( copied.parsed && copied.fd != -1 ) { + gcc_assert(copied.erased_lines.p); + std::copy_if(copied.erased_lines.p, copied.erased_lines.pend, ofs, + []( char ch ) { return ch == '\n'; } ); + struct { int in, out; filespan_t mfile; } copy; + dbgmsg("%s:%d: line %zu, opening %s on fd %d", __func__, __LINE__, + mfile.lineno(), + copybook.source(), copybook.current()->fd); + copy.in = copybook.current()->fd; + copy.mfile = free_form_reference_format( copy.in ); + + if( copied.partial_line.size() ) { + std::copy(copied.partial_line.p, copied.partial_line.pend, ofs); + } + out.flush(); + + if( copied.nreplace == 0 ) { + // process with extant REPLACE directive + process_file(copy.mfile, output); + } else { + copy.out = open_output(); + // process to intermediate, applying COPY ... REPLACING + process_file(copy.mfile, copy.out); + copy.mfile = map_file(copy.out); + replace_directives.pop(); + // process intermediate with extant REPLACE directive + process_file(copy.mfile, output, true); + // COPY statement is erased from input if processed successfully + } + cobol_filename_restore(); + } + + auto erased = parse_replace_directive(mfile); + if( erased.p ) { + std::copy_if( erased.p, erased.pend, ofs, + []( char ch ) { return ch == '\n'; } ); + } + if( replace_directives.empty() ) { + std::copy(mfile.cur, mfile.eol, ofs); + continue; // No active REPLACE directive. + } + + std::list segments = segment_line(mfile); // no replace yields + // // 1 segment + + for( const auto& segment : segments ) { + std::copy(segment.p, segment.pend, ofs); + } + + if( segments.size() == 2 ) { + struct { + size_t before, after; + int delta() const { return before - after; } } nlines; + nlines.before = std::count(segments.front().p, + segments.front().pend, '\n'); + nlines.after = std::count(segments.back().p, segments.back().pend, '\n'); + if( nlines.delta() < 0 ) { + yywarn("line %zu: REPLACED %zu lines with %zu lines, " + "line count off by %d", mfile.lineno(), + nlines.before, nlines.after, nlines.delta()); + } + int nnl = nlines.delta(); + while( nnl-- > 0 ) { + static const char nl[] = "\n"; + std::copy(nl, nl + 1, ofs); + } + } + out.flush(); + } + // end of file + if( !second_pass && --nfiles ) { + std::copy(file_pop, file_pop + strlen(file_pop), ofs); + out.flush(); + } +} + +std::list +cdftext::segment_line( filespan_t& mfile ) { + std::list output; + + gcc_assert( ! replace_directives.empty() ); + std::list pending; + recognize_replacements( mfile, pending ); + + if( pending.empty() ) { + output.push_back( span_t(mfile.cur, mfile.eol) ); + return output; + } + + for( const replace_t& segment : pending ) { + gcc_assert(mfile.cur <= segment.before.p); + gcc_assert(segment.before.pend <= mfile.eodata); + + output.push_back( span_t(mfile.cur, segment.before.p) ); + output.push_back( span_t(segment.after.p, segment.after.pend ) ); + + mfile.cur = const_cast(segment.before.pend); + } + + if( mfile.eol < mfile.cur ) { + if( (mfile.eol = std::find(mfile.cur, mfile.eodata, '\n')) < mfile.eodata ) { + mfile.eol++; + } + } + + // last segment takes to EOL + output.push_back( span_t(mfile.cur, mfile.eol) ); + + return output; +} + +//////// End of the cdf_text.h file diff --git a/gcc/cobol/lexio.h b/gcc/cobol/lexio.h new file mode 100644 index 00000000000..cf7f53a3c5b --- /dev/null +++ b/gcc/cobol/lexio.h @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include + +#ifndef _LEXIO_H_ +#define _LEXIO_H_ + +#define SPACE ' ' + +bool lexer_echo(); + +bool is_reference_format(); + +static inline bool isquote( char ch ) { + return ch == '\'' || ch == '"'; +} + +static inline void +erase_source( char *src, char *esrc ) { + std::replace_if(src, esrc, + [](char ch) { return ch != '\n'; }, + SPACE ); +} + +/* + * Column number as in Cobol, with 1 at the start of the line. + * 0: free-format, but comment lines may start with '*'. + * N: columns less than N treated as space. + * '/' or '*' in N starts a comment + * 'D' starts a debug line + * '-' is a line-continuation indicator + * Others ignored. + * Right margin is enforced if it is greater than left margin. + */ +struct bytespan_t { + char *data, *eodata; + + bytespan_t( char *data = NULL, char *eodata = NULL ) + : data(data), eodata(eodata) + { + if( eodata < data ) { + this->eodata = data + strlen(data); + } + assert( this->data <= this->eodata ); + } + size_t size() const { return eodata - data; } + + bool in_string( ) const { + char open = '\0'; + + for( char *q = data; (q = std::find_if(q, eodata, isquote)) != eodata; q++) { + if( !open ) { + open = *q; // first quote opens + continue; + } + if( open == *q && q + 1 < eodata && q[0] == q[1] ) { // doubled + q++; + continue; + } + if( open == *q ) open = '\0'; // closing quote must match + } + return isquote(open); + } + + char * append( const char *input, const char *eoinput ); + + bytespan_t& + update( char *line, char *eoline, size_t right_margin ) { + *this = bytespan_t(line, eoline); + if( right_margin && data + right_margin < eodata ) { + erase_source(data + right_margin, eodata); + eodata = data + right_margin; + } + eodata = std::find(data, eodata, '\n'); + return *this; + } +}; + +/* Location type. Borrowed from parse.h as generated by Bison. */ +#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED +typedef struct YYLTYPE YYLTYPE; +struct YYLTYPE +{ + int first_line; + int first_column; + int last_line; + int last_column; +}; +# define YYLTYPE_IS_DECLARED 1 +# define YYLTYPE_IS_TRIVIAL 1 +#endif + +struct filespan_t : public bytespan_t { + char *cur, *eol, *quote; + private: + size_t iline, icol; + size_t line_quote72; + static char empty_file[8]; + public: + filespan_t() + : cur(data), eol(data), quote(NULL), iline(0), icol(0), line_quote72(0) + {} + filespan_t(void *p, size_t len) + : bytespan_t( static_cast(p), static_cast(p) + len ) + , cur(data), eol(data), quote(NULL), iline(0), line_quote72(0) + {} + + size_t lineno() const { return iline; } + size_t colno() const { return icol; } + + void lineno_reset() { iline = 0; } + size_t colno( size_t icol ) { return this->icol = icol; } + + bool nada() const { return data == empty_file; } + void use_nada() { + assert(!data); + cur = eol = data = empty_file; + eol = eodata = empty_file + sizeof(empty_file) - 1; + } + + const char *ccur() const { return cur; } + + /* + * "If an alphanumeric or national literal that is to be continued on + * the next line has as its last character a quotation mark in + * column 72, the continuation line must start with two consecutive + * quotation marks." + */ + bool was_quote72() const { return iline == line_quote72 + 1; } + + size_t next_line() { + // Before advancing, mark the current line as ending in a quote, if true. + if( is_reference_format() && 72 <= line_length() ) { + if( isquote(cur[71]) ) { line_quote72 = iline; } + } + + cur = eol; + assert(data <= cur && cur <= eodata); + if( cur == eodata ) return 0; + + eol = std::find(cur, eodata, '\n'); + + if( eol < eodata ) { + ++eol; + ++iline; + icol = 0; + } + return eol - cur; + } + + size_t line_length() const { return eol - cur; } + + static size_t tab_check( const char *src, const char *esrc ); + + bool is_blank_line() const { + auto p = std::find_if( cur, eol, []( char ch ) { return !fisspace(ch); } ); + return p == eol; + } + + YYLTYPE as_location() const { + YYLTYPE loc; + + loc.first_line = loc.last_line = 1 + iline; + loc.first_column = loc.last_column = 1 + icol; + return loc; + } + +}; + +#if USE_STD_REGEX +# include +#else +# include "dts.h" +using dts::csub_match; +using dts::cmatch; +using dts::regex; +using dts::regex_search; +#endif + +struct span_t { + protected: + void verify() const { + if( !p ) { + dbgmsg("span_t::span_t: p is NULL"); + } else if( ! (p <= pend) ) { + dbgmsg("span_t::span_t: p %p > pend %p", p, pend); + } + assert(p && p <= pend); + } + span_t& trim() { + while( p < pend && isblank(p[0]) ) p++; + while( p < pend - 1 && isblank(pend[-1]) ) pend--; + return *this; + } + public: + const char *p, *pend; + span_t() : p(NULL), pend(NULL) {} + + span_t( size_t len, const char *data ) : p(data), pend(data + len) { + verify(); + } + span_t( const char *data, const char *eodata ) : p(data), pend(eodata) { + verify(); + } + span_t& operator=( const csub_match& cm ) { + p = cm.first; + pend = cm.second; + return p && pend ? trim() : *this; + } + + int size() const { return pend - p; } + + span_t dup() const { + auto output = new char[size() + 1]; + auto eout = std::copy(p, pend, output); + *eout = '\0'; + return span_t(output, eout); + } + const char * has_nul() const { + auto p = std::find(this->p, pend, '\0'); + return p != pend? p : NULL; + } +}; + +struct replace_t { + struct span_t before, after; + replace_t( span_t before = span_t(), + span_t after = span_t() ) + : before(before), after(after) + {} + replace_t& reset() { + before = after = span_t(); + return *this; + } +}; + +#include +#include + +class cdftext { + static filespan_t free_form_reference_format( int fd ); + static void process_file( filespan_t, int output, bool second_pass = false ); + + static filespan_t map_file( int fd ); + + static void echo_input( int input, const char filename[] ); + + static int open_input( const char filename[] ); + static int open_output(); + + static std::list segment_line( filespan_t& mfile ); + + public: + static FILE * lex_open( const char filename[] ); +}; + +std::list free_form_reference_format( filespan_t mfile ); + +#endif diff --git a/gcc/cobol/parse.y b/gcc/cobol/parse.y new file mode 100644 index 00000000000..15dbd1cff32 --- /dev/null +++ b/gcc/cobol/parse.y @@ -0,0 +1,13107 @@ +/* + * Copyright (c) 2021-2025 Symas Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the Symas Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +%code requires { + #include // Before cobol-system because it uses poisoned functions + #include "cobol-system.h" + #include + #include + #include + #include "io.h" + #include "ec.h" + +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" + + enum radix_t { + decimal_e = 10, + hexadecimal_e = 16, + boolean_e = 2, + }; + + enum accept_func_t { + accept_done_e, + accept_command_line_e, + accept_envar_e, + }; + + class literal_t { + size_t isym; + public: + char prefix[3]; + size_t len; + char *data; + + bool empty() const { return data == NULL; } + size_t isymbol() const { return isym; } + const char * symbol_name() const { + return isym? cbl_field_of(symbol_at(isym))->name : ""; + } + + literal_t& + set( size_t len, char *data, const char prefix[] ) { + set_prefix(prefix, strlen(prefix)); + set_data(len, data); + return *this; + } + + literal_t& + set( const cbl_field_t * field ) { + assert(field->has_attr(constant_e)); + assert(is_literal(field)); + + set_prefix( "", 0 ); + set_data( field->data.capacity, + const_cast(field->data.initial), + field_index(field) ); + return *this; + } + literal_t& + set_data( size_t len, char *data, size_t isym = 0 ) { + this->isym = isym; + this->len = len; + this->data = data; + if( this->prefix[0] == 'Z' ) { + this->data = new char[++this->len]; + auto p = std::copy(data, data + len, this->data); + *p = '\0'; + } + return *this; + } + literal_t& + set_prefix( const char *input, size_t len ) { + assert(len < sizeof(prefix)); + std::fill(prefix, prefix + sizeof(prefix), '\0'); + std::transform(input, input + len, prefix, toupper); + return *this; + } + bool + compatible_prefix( const literal_t& that ) const { + if( prefix[0] != that.prefix[0] ) { + return prefix[0] != 'N' && that.prefix[0] != 'N'; + } + return true; + } + }; + + struct acrc_t { // Abbreviated combined relation condition + cbl_refer_t *term; + relop_t op; + bool invert; + acrc_t& init( cbl_refer_t *term = NULL, + relop_t op = relop_t(-1), + bool invert = false ) + { + this->term = term; + this->op = op; + this->invert = invert; + return *this; + } + static acrc_t make( cbl_refer_t *term = NULL, + relop_t op = relop_t(-1), + bool invert = false ) + { + acrc_t output; + return output.init( term, op, invert ); + } + relop_t relop_from( relop_t ante_op ) const { + assert(ante_op != -1); + return op != -1? op : ante_op; + } + bool is_relation_condition() const { return term && term->field; } + }; + typedef std::list acrcs_t; + + enum data_category_t { data_category_none, + data_category_all, + data_alphabetic_e, + data_alphanumeric_e, + data_alphanumeric_edited_e, + data_boolean_e, + data_data_pointer_e, + data_function_pointer_e, + data_msg_tag_e, + data_dbcs_e, + data_egcs_e, + data_national_e, + data_national_edited_e, + data_numeric_e, + data_numeric_edited_e, + data_object_referenc_e, + data_program_pointer_e, + }; + + const char * data_category_str( data_category_t category ); + + typedef std::map category_map_t; + + struct substitution_t { + enum subst_fl_t { subst_all_e, subst_first_e = 'F', subst_last_e = 'L' }; + bool anycase; + subst_fl_t first_last; + cbl_refer_t *orig, *replacement; + + substitution_t& init( bool anycase, char first_last, + cbl_refer_t *orig, cbl_refer_t *replacement ) { + this->anycase = anycase; + switch(first_last) { + case 'F': this->first_last = subst_first_e; break; + case 'L': this->first_last = subst_last_e; break; + default: + this->first_last = subst_all_e; + break; + } + this->orig = orig; + this->replacement = replacement; + return *this; + } + }; + typedef std::list substitutions_t; + + struct init_statement_t { + bool to_value; + data_category_t category; + category_map_t replacement; + + init_statement_t( category_map_t replacement ) + : to_value(false) + , category(data_category_none) + , replacement(replacement) + + {} + + init_statement_t( bool to_value = false ) + : to_value(to_value) + , category(data_category_none) + , replacement(category_map_t()) + {} + + }; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" + static data_category_t + data_category_of( const cbl_refer_t& refer ); + + static _Float128 + numstr2i( const char input[], radix_t radix ); + + struct cbl_field_t; + static inline cbl_field_t * + new_literal( const char initial[], enum radix_t radix ); +#pragma GCC diagnostic pop + + + #include + + enum select_clause_t { + access_clause_e = 0x0001, + alt_key_clause_e = 0x0002, + assign_clause_e = 0x0004, + collating_clause_e = 0x0008, + file_status_clause_e = 0x0010, + lock_mode_clause_e = 0x0020, + organization_clause_e = 0x0040, + padding_clause_e = 0x0080, + record_delim_clause_e = 0x0100, + record_key_clause_e = 0x0200, + relative_key_clause_e = 0x0400, + reserve_clause_e = 0x0800, + sharing_clause_e = 0x1000, + }; + + struct symbol_elem_t; + struct symbol_elem_t * symbols_begin( size_t first ); + struct symbol_elem_t * symbols_end(); + + void field_done(); + + template + struct Elem_list_t { + std::list elems; + Elem_list_t() {} + Elem_list_t( E elem ) { + elems.push_back(elem); + } + Elem_list_t * push_back( E elem ) { + elems.push_back(elem); + return this; + } + void clear() { + for( auto p = elems.begin(); p != elems.end(); p++ ) { + assert( !(symbols_begin(0) <= *p && *p < symbols_end()) ); + delete *p; + } + elems.clear(); + } + }; + + struct file_list_t; + struct cbl_label_t; + typedef struct Elem_list_t Label_list_t; + + struct cbl_file_key_t; + typedef struct Elem_list_t key_list_t; + + struct cbl_declarative_t; + typedef struct Elem_list_t declarative_list_t; + typedef struct Elem_list_t ec_list_t; + typedef struct Elem_list_t isym_list_t; + + struct rel_part_t; + + bool set_debug(bool); + +#include "ec.h" +#include "common-defs.h" +#include "inspect.h" +} + +%{ +#include // Before cobol-system because it uses poisoned functions +#include "cobol-system.h" +#include "cdfval.h" +#include "ec.h" +#include "common-defs.h" +#include "util.h" +#include "cbldiag.h" +#include "symbols.h" +#include "inspect.h" +#include "io.h" +#include "genapi.h" +#include "exceptl.h" +#include "exceptg.h" +#include "parse_ante.h" +%} + +%token IDENTIFICATION_DIV "IDENTIFICATION DIVISION" + ENVIRONMENT_DIV "ENVIRONMENT DIVISION" + PROCEDURE_DIV "PROCEDURE DIVISION" + DATA_DIV "DATA DIVISION" + FILE_SECT "FILE SECTION" + INPUT_OUTPUT_SECT "INPUT-OUTPUT SECTION" + LINKAGE_SECT "LINKAGE SECTION" + LOCAL_STORAGE_SECT "LOCAL-STORAGE SECTION" + WORKING_STORAGE_SECT "WORKING-STORAGE SECTION" + +%token OBJECT_COMPUTER "OBJECT COMPUTER" + +%token DISPLAY_OF "DISPLAY OF" + END_FUNCTION "END FUNCTION" + END_PROGRAM "END PROGRAM" + END_SUBPROGRAM "END PROGRAM " + +%token JUSTIFIED RETURNING NO_CONDITION "invalid token" + +%token ALNUM ALPHED +%token ERROR EXCEPTION SIZE_ERROR "SIZE ERROR" +%token EXCEPTION_NAME "EXCEPTION NAME" +%token LEVEL LEVEL66 "66" LEVEL78 "78" LEVEL88 "88" +%token CLASS_NAME "class name" + NAME + NAME88 "Level 88 NAME" + NUME "Name" + NUMED "NUMERIC-EDITED picture" + NUMED_CR "NUMERIC-EDITED CR picture" + NUMED_DB "NUMERIC-EDITED DB picture" +%token NINEDOT NINES NINEV PIC_P +%token SPACES +%token LITERAL +%token END EOP +%token FILENAME +%token INVALID +%token NUMBER NEGATIVE +%token NUMSTR "numeric literal" +%token OVERFLOW +%token COMPUTATIONAL + +%token PERFORM BACKWARD +%token POSITIVE +%token POINTER +%token SECTION +%token STANDARD_ALPHABET "STANDARD ALPHABET" +%token SWITCH +%token UPSI +%token ZERO + + /* environment names */ +%token SYSIN SYSIPT SYSOUT SYSLIST SYSLST SYSPUNCH SYSPCH CONSOLE +%token C01 C02 C03 C04 C05 C06 C07 C08 C09 C10 C11 C12 CSP +%token S01 S02 S03 S04 S05 AFP_5A "AFP 5A" +%token STDIN STDOUT STDERR + + /* intrinsics */ +%token LIST MAP NOLIST NOMAP NOSOURCE +%token MIGHT_BE "IS or IS NOT" + FUNCTION_UDF "UDF name" + FUNCTION_UDF_0 "UDF" + +%token DATE_FMT "date format" + TIME_FMT "time format" + DATETIME_FMT "datetime format" + + /* tokens without semantic value */ + /* CDF (COPY and >> defined here but used in cdf.y) */ +%token BASIS CBL CONSTANT COPY + DEFINED ENTER FEATURE INSERTT + LSUB "(" + PARAMETER_kw "PARAMETER" + OVERRIDE READY RESET + RSUB ")" + SERVICE_RELOAD "SERVICE RELOAD" STAR_CBL "*CBL" + SUBSCRIPT SUPPRESS TITLE TRACE USE + + COBOL_WORDS ">>COBOL-WORDS" EQUATE UNDEFINE + CDF_DEFINE ">>DEFINE" CDF_DISPLAY ">>DISPLAY" + CDF_IF ">>IF" CDF_ELSE ">>ELSE" CDF_END_IF ">>END-IF" + CDF_EVALUATE ">>EVALUATE" + CDF_WHEN ">>WHEN" + CDF_END_EVALUATE ">>END-EVALUATE" + CALL_COBOL "CALL" CALL_VERBATIM "CALL (as C)" + + IF THEN ELSE + SENTENCE + ACCEPT ADD ALTER CALL CANCEL CLOSE COMPUTE CONTINUE + DELETE DISPLAY DIVIDE EVALUATE EXIT FILLER_kw "FILLER" + GOBACK GOTO + INITIALIZE INSPECT + MERGE MOVE MULTIPLY OPEN PARAGRAPH + READ RELEASE RETURN REWRITE + SEARCH SET SELECT SORT SORT_MERGE "SORT-MERGE" + STRING_kw "STRING" STOP SUBTRACT START + UNSTRING WRITE WHEN + + ABS ACCESS ACOS ACTUAL ADVANCING AFTER ALL + ALLOCATE + ALPHABET ALPHABETIC ALPHABETIC_LOWER "ALPHABETIC-LOWER" + ALPHABETIC_UPPER "ALPHABETIC-UPPER" + ALPHANUMERIC + ALPHANUMERIC_EDITED "ALPHANUMERIC-EDITED" + ALSO ALTERNATE ANNUITY ANUM ANY ANYCASE APPLY ARE + AREA AREAS AS + ASCENDING ACTIVATING ASIN ASSIGN AT ATAN + + BASED BASECONVERT + BEFORE BINARY BIT BIT_OF "BIT-OF" BIT_TO_CHAR "BIT-TO-CHAR" + BLANK BLOCK + BOOLEAN_OF_INTEGER "BOOLEAN-OF-INTEGER" + BOTTOM BY + BYTE BYTE_LENGTH "BYTE-LENGTH" + + CF CH + CHANGED CHAR CHAR_NATIONAL "CHAR-NATIONAL" + CHARACTER CHARACTERS CHECKING CLASS + COBOL CODE CODESET COLLATING + COLUMN COMBINED_DATETIME "COMBINED-DATETIME" + COMMA COMMAND_LINE "COMMAND-LINE" + COMMAND_LINE_COUNT "COMMAND-LINE-COUNT" + COMMIT COMMON + + CONCAT CONDITION CONFIGURATION_SECT "CONFIGURATION SECTION" + CONTAINS + CONTENT CONTROL CONTROLS CONVERT CONVERTING CORRESPONDING COS + COUNT CURRENCY CURRENT CURRENT_DATE + + DATA DATE DATE_COMPILED + DATE_OF_INTEGER "DATE-OF-INTEGER" + DATE_TO_YYYYMMDD "DATE-TO-YYYYMMDD" + DATE_WRITTEN "DATE-WRITTEN" + DAY DAY_OF_INTEGER "DAY-OF-INTEGER" + DAY_OF_WEEK "DAY-OF-WEEK" + DAY_TO_YYYYDDD "DAY-TO-YYYYDDD" + DBCS DE DEBUGGING DECIMAL_POINT + DECLARATIVES DEFAULT DELIMITED DELIMITER DEPENDING + DESCENDING DETAIL DIRECT + DIRECT_ACCESS "DIRECT-ACCESS" + DOWN DUPLICATES + DYNAMIC + + E EBCDIC EC EGCS ENTRY ENVIRONMENT EQUAL EVERY + EXAMINE EXHIBIT EXP EXP10 EXTEND EXTERNAL + + EXCEPTION_FILE "EXCEPTION-FILE" + EXCEPTION_FILE_N "EXCEPTION-FILE-N" + EXCEPTION_LOCATION "EXCEPTION-LOCATION" + EXCEPTION_LOCATION_N "EXCEPTION-LOCATION-N" + EXCEPTION_STATEMENT "EXCEPTION-STATEMENT" + EXCEPTION_STATUS "EXCEPTION-STATUS" + + FACTORIAL FALSE_kw "False" FD + FILE_CONTROL "FILE-CONTROL" + FILE_KW "File" + FILE_LIMIT "FILE-LIMIT" + FINAL FINALLY + FIND_STRING "FIND-STRING" + FIRST FIXED FOOTING FOR + FORMATTED_CURRENT_DATE "FORMATTED-CURRENT-DATE" + FORMATTED_DATE "FORMATTED-DATE" + FORMATTED_DATETIME "FORMATTED-DATETIME" + FORMATTED_TIME "FORMATTED-TIME" + FORM_OVERFLOW "FORM-OVERFLOW" + FREE + FRACTION_PART "FRACTION-PART" + FROM FUNCTION + + GENERATE GIVING GLOBAL GO GROUP + + HEADING HEX + HEX_OF "HEX-OF" + HEX_TO_CHAR "HEX-TO-CHAR" + HIGH_VALUES "HIGH-VALUES" + HIGHEST_ALGEBRAIC "HIGHEST-ALGEBRAIC" + HOLD + + IBM_360 IN INCLUDE INDEX INDEXED INDICATE INITIAL_kw "INITIAL" + INITIATE INPUT INSTALLATION INTERFACE + INTEGER + INTEGER_OF_BOOLEAN "INTEGER-OF-BOOLEAN" + INTEGER_OF_DATE "INTEGER-OF-DATE" + INTEGER_OF_DAY "INTEGER-OF-DAY" + INTEGER_OF_FORMATTED_DATE "INTEGER-OF-FORMATTED-DATE" + INTEGER_PART "INTEGER-PART" + INTO INTRINSIC INVOKE IO IO_CONTROL "IO-CONTROL" + IS ISNT "IS NOT" + + KANJI KEY + + LABEL LAST LEADING LEFT LENGTH + LENGTH_OF "LENGTH-OF" + LIMIT LIMITS LINE LINES + LINE_COUNTER "LINE-COUNTER" + LINAGE LINKAGE LOCALE LOCALE_COMPARE "LOCALE-COMPARE" + LOCALE_DATE "LOCALE-DATE" + LOCALE_TIME "LOCALE-TIME" + LOCALE_TIME_FROM_SECONDS "LOCALE-TIME-FROM-SECONDS" + LOCAL_STORAGE "LOCAL-STORAGE" + LOCATION + LOCK LOCK_ON LOG LOG10 + LOWER_CASE "LOWER-CASE" + LOW_VALUES "LOW-VALUES" + LOWEST_ALGEBRAIC "LOWEST-ALGEBRAIC" + LPAREN " )" + + MANUAL MAXX "Max" MEAN MEDIAN MIDRANGE + MINN "Min" MULTIPLE MOD MODE + MODULE_NAME "MODULE-NAME " + + NAMED NAT NATIONAL + NATIONAL_EDITED "NATIONAL-EDITED" + NATIONAL_OF "NATIONAL-OF" + NATIVE NESTED NEXT + NO NOTE + NULLS NULLPTR + NUMERIC + NUMERIC_EDITED NUMVAL + NUMVAL_C "NUMVAL-C" + NUMVAL_F "NUMVAL-F" + + OCCURS OF OFF OMITTED ON ONLY OPTIONAL OPTIONS ORD ORDER + ORD_MAX "ORD-MAX" + ORD_MIN "ORD-MIN" + ORGANIZATION OTHER OTHERWISE OUTPUT + + PACKED_DECIMAL PADDING PAGE + PAGE_COUNTER "PAGE-COUNTER" + PF PH PI PIC PICTURE + PLUS PRESENT_VALUE PRINT_SWITCH + PROCEDURE PROCEDURES PROCEED PROCESS + PROGRAM_ID "PROGRAM-ID" + PROGRAM_kw "Program" PROPERTY PROTOTYPE PSEUDOTEXT + + QUOTES "QUOTE" + + RANDOM RANDOM_SEED RANGE RAISE RAISING + RD RECORD RECORDING RECORDS RECURSIVE + REDEFINES REEL REFERENCE RELATIVE REM REMAINDER REMARKS + REMOVAL RENAMES REPLACE REPLACING REPORT REPORTING REPORTS + REPOSITORY RERUN RESERVE RESTRICTED RESUME + REVERSE REVERSED REWIND RF RH RIGHT ROUNDED RUN + + SAME SCREEN SD + SECONDS_FROM_FORMATTED_TIME "SECONDS-FROM-FORMATTED-TIME" + SECONDS_PAST_MIDNIGHT "SECONDS-PAST-MIDNIGHT" + SECURITY + SEPARATE SEQUENCE SEQUENTIAL SHARING + SIMPLE_EXIT "(simple) EXIT" + SIGN SIN SIZE + SMALLEST_ALGEBRAIC "SMALLEST-ALGEBRAIC" + SOURCE + SOURCE_COMPUTER "SOURCE-COMPUTER" + SPECIAL_NAMES SQRT STACK + STANDARD + STANDARD_1 "STANDARD-1" + STANDARD_DEVIATION "STANDARD-DEVIATION " + STANDARD_COMPARE "STANDARD-COMPARE" + STATUS STRONG + SUBSTITUTE SUM SYMBOL SYMBOLIC SYNCHRONIZED + + TALLY TALLYING TAN TERMINATE TEST + TEST_DATE_YYYYMMDD "TEST-DATE-YYYYMMDD" + TEST_DAY_YYYYDDD "TEST-DAY-YYYYDDD" + TEST_FORMATTED_DATETIME "TEST-FORMATTED-DATETIME" + TEST_NUMVAL "TEST-NUMVAL" + TEST_NUMVAL_C "TEST-NUMVAL-C" + TEST_NUMVAL_F "TEST-NUMVAL-F" + THAN TIME TIMES + TO TOP + TOP_LEVEL + TRACKS TRACK_AREA TRAILING TRANSFORM TRIM TRUE_kw "True" TRY + TURN TYPE TYPEDEF + + ULENGTH UNBOUNDED UNIT UNITS UNIT_RECORD UNTIL UP UPON + UPOS UPPER_CASE USAGE USING USUBSTR USUPPLEMENTARY + UTILITY UUID4 UVALID UWIDTH + + VALUE VARIANCE VARYING VOLATILE + + WHEN_COMPILED WITH WORKING_STORAGE + XML XMLGENERATE XMLPARSE + YEAR_TO_YYYY YYYYDDD YYYYMMDD + + /* unused Context Words */ + ARITHMETIC ATTRIBUTE AUTO AUTOMATIC + AWAY_FROM_ZERO "AWAY-FROM-ZERO" + BACKGROUND_COLOR "BACKGROUND-COLOR" + BELL + BINARY_ENCODING "BINARY-ENCODING" + BLINK + CAPACITY CENTER CLASSIFICATION CYCLE + DECIMAL_ENCODING "DECIMAL-ENCODING" + ENTRY_CONVENTION EOL EOS ERASE EXPANDS + FLOAT_BINARY "FLOAT-BINARY" + FLOAT_DECIMAL "FLOAT-DECIMAL" + FOREGROUND_COLOR FOREVER FULL + HIGHLIGHT + HIGH_ORDER_LEFT "HIGH-ORDER-LEFT" + HIGH_ORDER_RIGHT "HIGH-ORDER-RIGHT" + IGNORING IMPLEMENTS INITIALIZED INTERMEDIATE + LC_ALL_kw "LC-ALL" + LC_COLLATE_kw "LC-COLLATE" + LC_CTYPE_kw "LC-CTYPE" + LC_MESSAGES_kw "LC-MESSAGES" + LC_MONETARY_kw "LC-MONETARY" + LC_NUMERIC_kw "LC-NUMERIC" + LC_TIME_kw "LC-TIME" + LOWLIGHT + NEAREST_AWAY_FROM_ZERO "NEAREST-AWAY-FROM-ZERO" + NEAREST_EVEN NEAREST_TOWARD_ZERO "NEAREST-EVEN NEAREST-TOWARD-ZERO" + NONE NORMAL NUMBERS + PREFIXED PREVIOUS PROHIBITED RELATION REQUIRED + REVERSE_VIDEO ROUNDING + SECONDS SECURE SHORT SIGNED + STANDARD_BINARY "STANDARD-BINARY" + STANDARD_DECIMAL "STANDARD-DECIMAL" + STATEMENT STEP STRUCTURE + TOWARD_GREATER "TOWARD-GREATER" + TOWARD_LESSER "TOWARD-LESSER" + TRUNCATION + UCS_4 "UCS-4" + UNDERLINE UNSIGNED + UTF_16 "UTF-16" + UTF_8 "UTF-8" + + ADDRESS + END_ACCEPT "END-ACCEPT" + END_ADD "END-ADD" + END_CALL "END-CALL" + END_COMPUTE "END-COMPUTE" + END_DELETE "END-DELETE" + END_DISPLAY "END-DISPLAY" + END_DIVIDE "END-DIVIDE" + END_EVALUATE "END-EVALUATE" + END_MULTIPLY "END-MULTIPLY" + END_PERFORM "END-PERFORM" + END_READ "END-READ" + END_RETURN "END-RETURN" + END_REWRITE "END-REWRITE" + END_SEARCH "END-SEARCH" + END_START "END-START" + END_STRING "END-STRING" + END_SUBTRACT "END-SUBTRACT" + END_UNSTRING "END-UNSTRING" + END_WRITE "END-WRITE" + END_IF "END-IF" + /* end tokens without semantic value */ + + // YYEOF added for compatibility with Bison 3.5 + // https://savannah.gnu.org/forum/forum.php?forum_id=9735 +%token YYEOF 0 "end of file" + +%type sentence statements statement +%type star_cbl_opt close_how + +%type test_before usage_clause1 might_be +%type all optional sign_leading on_off initialized strong +%type count data_clauses data_clause +%type nine nines nps relop spaces_etc reserved_value signed +%type variable_type +%type true_false posneg eval_posneg +%type open_io alphabet_etc +%type device_name +%type numed collating_sequence context_word ctx_name locale_spec +%type namestr alphabet_lit program_as repo_as +%type perform_cond kind_of_name +%type alloc_ret + +%type log_term rel_expr rel_abbr eval_abbr +%type num_value num_term value factor +%type simple_cond bool_expr +%type log_expr rel_abbrs eval_abbrs +%type rel_term rel_term1 + +%type value78 +%type literal name nume typename +%type num_literal signed_literal + +%type perform_start +%type perform_times +%type perform_verb + perform_inline perform_except + +%type eval_subject1 +%type vargs disp_vargs; +%type level_name +%type fd_name picture_sym name66 paragraph_name +%type literalism +%type bound advance_when org_clause1 read_next +%type access_mode multiple lock_how lock_mode +%type select_clauses +%type select_clause access_clause alt_key_clause + assign_clause collate_clause status_clause + lock_mode_clause org_clause padding_clause + record_delim_clause record_key_clause + relative_key_clause reserve_clause sharing_clause + +%type filename read_body write_body delete_body +%type rewrite_body +%type record_vary rec_contains from_to record_desc +%type read_file rewrite1 write_file +%type data_descr data_descr1 write_what file_record +%type name88 +%type advancing advance_by +%type alphaval alpha_val numeref scalar scalar88 +%type tableref tableish +%type varg varg1 varg1a +%type expr expr_term compute_expr free_tgt by_value_arg +%type move_tgt selected_name read_key read_into vary_by +%type accept_refer num_operand envar search_expr any_arg +%type accept_body +%type expr_list subscripts arg_list free_tgts +%type move_tgts set_tgts +%type search_varying +%type search_term search_terms +%type