gcc/libgcobol/gmath.cc
Iain Sandoe 95f10974a9 libgcobol: Allow libgcobol to use libquadmath [PR119244].
Many of the changes are mechanical:
 1. 'GCOB_FP128' in place of _Float128.
 2. Using FP128_FUNC to represent the spelling of intrinsics.
 3. Using GCOB_FP128_LITERAL() to choose the suffix for literals.

This allows for:
  __float128 and 'q' as the suffix when libquadmath is configured.
   _Float128 / 'f128' when IEC-60559 is available in libc
 long double / 'l' when long double is ieee753 128b.

Add libquadmath to libgcobol.spec and its dependencies  where the
platform needs it.

	PR cobol/119244

libgcobol/ChangeLog:

	* Makefile.am: Add support for libquadmath.
	* Makefile.in: Regenerate.
	* acinclude.m4: Add support for libquadmath.
	* config.h.in: Regenerate.
	* configure: Regenerate.
	* configure.ac: Configure libquadmath support.
	* gmath.cc: Use GCOB_FP128 to represent the configured
	128b floating point type.  Use FP128_FUNC to represent
	the naming of intrinsics in the configure 128b floating
	point type. Render literals with GCOB_FP128_LITERAL.
	* intrinsic.cc: Likewise.
	* libgcobol.cc: Likewise.
	* libgcobol.h: Likewise.
	* libgcobol-fp.h: New file.
	* gfileio.cc: Include libgcobol-fp.h.
	* libgcobol.spec.in: Add libquadmath configure output.

Signed-off-by: Iain Sandoe <iain@sandoe.co.uk>
2025-04-10 11:05:07 +01:00

2168 lines
66 KiB
C++

/*
* Copyright (c) 2021-2025 Symas Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of the Symas Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <math.h>
#include <fenv.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <algorithm>
#include "config.h"
#include "libgcobol-fp.h"
#include "ec.h"
#include "common-defs.h"
#include "io.h"
#include "gcobolio.h"
#include "libgcobol.h"
#include "common-defs.h"
#include "gmath.h"
#include "gcobolio.h"
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#define MAX_INTERMEDIATE_BITS 126
#define MAX_INTERMEDIATE_DECIMALS 16
static int
conditional_stash( cblc_field_t *destination,
size_t destination_o,
size_t destination_s,
bool on_error_flag,
__int128 value,
int rdigits,
cbl_round_t rounded)
{
int retval = compute_error_none;
if( !on_error_flag )
{
// It's an uncomplicated assignment, because there was no
// ON SIZE ERROR phrase
__gg__int128_to_qualified_field(destination,
destination_o,
destination_s,
value,
rdigits,
rounded,
&retval);
}
else
{
// This is slightly more complex, because in the event of a
// SIZE ERROR. we need to leave the original value untouched
unsigned char *stash = (unsigned char *)malloc(destination_s);
memcpy(stash, destination->data+destination_o, destination_s);
__gg__int128_to_qualified_field(destination,
destination_o,
destination_s,
value,
rdigits,
rounded,
&retval);
if( retval )
{
// Because there was a size error, we will report that
// upon return, and we need to put back the original value:
memcpy(destination->data+destination_o, stash, destination_s);
}
free(stash);
}
return retval;
}
static int
conditional_stash( cblc_field_t *destination,
size_t destination_o,
size_t destination_s,
bool on_error_flag,
GCOB_FP128 value,
cbl_round_t rounded)
{
int retval = compute_error_none;
if( !on_error_flag )
{
// It's an uncomplicated assignment, because there was no
// ON SIZE ERROR phrase
__gg__float128_to_qualified_field(destination,
destination_o,
value,
rounded,
&retval);
}
else
{
// This is slightly more complex, because in the event of a
// SIZE ERROR. we need to leave the original value untouched
unsigned char *stash = (unsigned char *)malloc(destination_s);
memcpy(stash, destination->data+destination_o, destination_s);
__gg__float128_to_qualified_field(destination,
destination_o,
value,
rounded,
&retval);
if( retval )
{
// Because there was a size error, we will report that
// upon return, and we need to put back the original value:
memcpy(destination->data+destination_o, stash, destination_s);
}
free(stash);
}
return retval;
}
static
GCOB_FP128
divide_helper_float(GCOB_FP128 a_value,
GCOB_FP128 b_value,
int *compute_error)
{
if( b_value == 0 )
{
// Can't divide by zero
*compute_error |= compute_error_divide_by_zero;
return a_value;
}
// Do the actual division, giving us 0.399999999999999999999999999999999971
a_value /= b_value;
if( __builtin_isinf(a_value) )
{
*compute_error |= compute_error_overflow;
return 0;
}
if( __builtin_isnan(a_value) )
{
*compute_error |= compute_error_underflow;
return 0;
}
return a_value;
}
static
GCOB_FP128
multiply_helper_float(GCOB_FP128 a_value,
GCOB_FP128 b_value,
int *compute_error)
{
a_value *= b_value;
if( __builtin_isinf(a_value) )
{
*compute_error |= compute_error_overflow;
return 0;
}
if( __builtin_isnan(a_value) )
{
*compute_error |= compute_error_underflow;
return 0;
}
return a_value;
}
static
GCOB_FP128
addition_helper_float(GCOB_FP128 a_value,
GCOB_FP128 b_value,
int *compute_error)
{
a_value += b_value;
if( __builtin_isinf(a_value) )
{
*compute_error |= compute_error_overflow;
return 0;
}
if( __builtin_isnan(a_value) )
{
*compute_error |= compute_error_underflow;
return 0;
}
return a_value;
}
static
GCOB_FP128
subtraction_helper_float(GCOB_FP128 a_value,
GCOB_FP128 b_value,
int *compute_error)
{
a_value -= b_value;
if( __builtin_isinf(a_value) )
{
*compute_error |= compute_error_overflow;
return 0;
}
if( __builtin_isnan(a_value) )
{
*compute_error |= compute_error_underflow;
return 0;
}
return a_value;
}
extern "C"
void
__gg__pow( cbl_arith_format_t,
size_t,
size_t,
size_t,
cbl_round_t *rounded,
int on_error_flag,
int *compute_error
)
{
cblc_field_t **A = __gg__treeplet_1f;
size_t *A_o = __gg__treeplet_1o;
size_t *A_s = __gg__treeplet_1s;
cblc_field_t **B = __gg__treeplet_2f;
size_t *B_o = __gg__treeplet_2o;
size_t *B_s = __gg__treeplet_2s;
cblc_field_t **C = __gg__treeplet_3f;
size_t *C_o = __gg__treeplet_3o;
size_t *C_s = __gg__treeplet_3s;
GCOB_FP128 avalue = __gg__float128_from_qualified_field(A[0], A_o[0], A_s[0]);
GCOB_FP128 bvalue = __gg__float128_from_qualified_field(B[0], B_o[0], B_s[0]);
GCOB_FP128 tgt_value;
if( avalue == 0 && bvalue == 0 )
{
*compute_error |= compute_error_exp_zero_by_zero;
tgt_value = 1;
}
else if(avalue == 0 && bvalue < 0 )
{
*compute_error |= compute_error_exp_zero_by_minus;
tgt_value = 0;
}
else
{
// Calculate our answer, in floating point:
errno = 0;
feclearexcept(FE_ALL_EXCEPT);
tgt_value = FP128_FUNC(pow)(avalue, bvalue);
if( errno || fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW) )
{
// One of a large number of errors took place. See math_error(7) and
// pow(3). Let's just use this last error as a grab-bag; I didn't
// care to go down the rabbit hole of figuring out if a floating point
// number did or did not have a fractional part. That way lies
// madness.
*compute_error |= compute_error_exp_minus_by_frac;
// This kind of error doesn't overwrite the target, so the returned
// value is not relevant. Make it zero to avoid overheating the
// converstion routine
tgt_value = 0;
}
}
if( !(*compute_error & compute_error_exp_minus_by_frac) )
{
*compute_error |= conditional_stash(C[0],
C_o[0],
C_s[0],
(on_error_flag & ON_SIZE_ERROR),
tgt_value,
*rounded);
}
}
extern "C"
void
__gg__process_compute_error(int compute_error)
{
// This routine gets called after a series of parser_op operations is
// complete (see parser_assign()) when the source code didn't specify
// an ON SIZE ERROR clause.
if( compute_error & compute_error_divide_by_zero)
{
exception_raise(ec_size_zero_divide_e);
}
else if( compute_error & compute_error_truncate )
{
exception_raise(ec_size_truncation_e);
}
else if( compute_error & ( compute_error_exp_zero_by_zero
| compute_error_exp_zero_by_minus
| compute_error_exp_minus_by_frac ) )
{
exception_raise(ec_size_exponentiation_e);
}
else if( compute_error & compute_error_overflow )
{
exception_raise(ec_size_overflow_e);
}
else if( compute_error & compute_error_underflow )
{
exception_raise(ec_size_underflow_e);
}
}
typedef unsigned __int128 uint128;
typedef struct int256
{
union
{
unsigned char data[32];
uint64_t i64 [4];
uint128 i128[2];
};
}int256;
static int
multiply_int256_by_int64(int256 &product, const uint64_t multiplier)
{
// Typical use of this routine is multiplying a temporary value by
// a factor of ten. This is effectively left-shifting by decimal
// digits. See scale_int256_by_digits
uint64_t overflows[5] = {};
for(int i=0; i<4; i++)
{
uint128 temp = (uint128)product.i64[i] * multiplier;
product.i64[i] = *(uint64_t *)(&temp);
overflows[i+1] = *(uint64_t *)((uint8_t *)(&temp) + 8);
}
for(int i=1; i<4; i++)
{
product.i64[i] += overflows[i];
if(product.i64[i] < overflows[i])
{
overflows[i+1] += 1;
}
}
// Indicate that an overflow took place. This is not useful unless the int256
// is known to be positive.
return overflows[4];
}
static int
add_int256_to_int256(int256 &sum, const int256 addend)
{
uint128 overflows[3] = {};
for(int i=0; i<2; i++)
{
sum.i128[i] += addend.i128[i];
if( sum.i128[i] < addend.i128[i] )
{
overflows[i+1] = 1;
}
}
if( overflows[1] )
{
sum.i128[1] += overflows[1];
if( sum.i128[1] == 0 )
{
overflows[2] = 1;
}
}
// Indicate that an overflow took place. This is not useful unless the two
// values are known to be positive.
return (int)overflows[2];
}
static void
negate_int256(int256 &val)
{
val.i128[0] = ~val.i128[0];
val.i128[1] = ~val.i128[1];
val.i128[0] += 1;
if( !val.i128[0] )
{
val.i128[1] += 1;
}
}
static int
subtract_int256_from_int256(int256 &difference, int256 subtrahend)
{
negate_int256(subtrahend);
return add_int256_to_int256(difference, subtrahend);
}
static void
scale_int256_by_digits(int256 &val, int digits)
{
uint64_t pot;
while(digits > 17)
{
pot = (uint64_t)__gg__power_of_ten(17);
multiply_int256_by_int64(val, pot);
digits -= 17;
}
pot = (uint64_t)__gg__power_of_ten(digits);
multiply_int256_by_int64(val, pot);
}
static void
divide_int256_by_int64(int256 &val, uint64_t divisor)
{
// val needs to be a positive number
uint128 temp = 0;
for( int i=3; i>=0; i-- )
{
// Left shift temp 64 bits:
*(uint64_t *)(((uint8_t *)&temp)+8) = *(uint64_t *)(((uint8_t *)&temp)+0);
// Put the high digit of val into the bottom of temp
*(uint64_t *)(((uint8_t *)&temp)+0) = val.i64[i];
// Divide that combinary by divisor to get the new digits
val.i64[i] = temp / divisor;
// And the new temp is that combination modulo divisor
temp = temp % divisor;
}
}
static int
squeeze_int256(int256 &val, int &rdigits)
{
int overflow = 0;
// It has been decreed that at this juncture the result must fit into
// MAX_FIXED_POINT_DIGITS. If the result does not, we have an OVERFLOW error.
int is_negative = val.data[31] & 0x80;
if( is_negative )
{
negate_int256(val);
}
// As long as there are some decimal places left, we hold our nose and right-
// shift a too-large value rightward by decimal digits. In other words, we
// truncate the fractional part to make room for the integer part:
while(rdigits > 0 && val.i128[1] )
{
divide_int256_by_int64(val, 10UL);
rdigits -= 1;
}
// At this point, to be useful, val has to have fewer than 128 bits:
if( val.i128[1] )
{
overflow = compute_error_overflow;
}
else
{
// We know that it has fewer than 128 bits. But the remaining 128 bits need
// to be less than 10^MAX_FIXED_POINT_DIGITS. This gets a bit nasty here,
// since at this writing the gcc compiler doesn't understand 128-bit
// constants. So, we are forced into some annoying compiler gymnastics.
#if MAX_FIXED_POINT_DIGITS != 37
#error MAX_FIXED_POINT_DIGITS needs to be 37
#endif
// These sixteen bytes comprise the binary value of 10^38
static const uint8_t C1038[] = {0x00, 0x00, 0x00, 0x00, 0x40, 0x22, 0x8a, 0x09,
0x7a, 0xc4, 0x86, 0x5a, 0xa8, 0x4c, 0x3b, 0x4b};
static const uint128 biggest = *(uint128 *)C1038;
// If we still have some rdigits to throw away, we can keep shrinking
// the value:
while(rdigits > 0 && val.i128[0] >= biggest )
{
divide_int256_by_int64(val, 10UL);
rdigits -= 1;
}
// And we have to make sure that rdigits isn't too big
while(rdigits > MAX_FIXED_POINT_DIGITS)
{
divide_int256_by_int64(val, 10UL);
rdigits -= 1;
}
if( val.i128[0] >= biggest )
{
overflow = compute_error_overflow;
}
}
if( is_negative )
{
negate_int256(val);
}
return overflow;
}
static void
get_int256_from_qualified_field(int256 &var,
int &rdigits,
cblc_field_t *field,
size_t field_o,
size_t field_s)
{
var.i128[0] = (uint128)__gg__binary_value_from_qualified_field( &rdigits,
field,
field_o,
field_s);
if( var.data[15] & 0x80 )
{
// This value is negative, so extend the sign bit:
memset(var.data + 16, 0xFF, 16);
}
else
{
// This value is positive
memset(var.data + 16, 0x00, 16);
}
}
static int256 phase1_result;
static int phase1_rdigits;
static GCOB_FP128 phase1_result_float;
extern "C"
void
__gg__add_fixed_phase1( cbl_arith_format_t ,
size_t nA,
size_t ,
size_t ,
cbl_round_t *,
int ,
int *compute_error
)
{
// Our job is to add together the nA fixed-point values in the A[] array
// The result goes into the temporary phase1_result.
cblc_field_t **A = __gg__treeplet_1f;
size_t *A_o = __gg__treeplet_1o;
size_t *A_s = __gg__treeplet_1s;
// Let us prime the pump with the first value of A[]
get_int256_from_qualified_field(phase1_result, phase1_rdigits, A[0], A_o[0], A_s[0]);
// We now go into a loop adding each of the A[] values to phase1_result:
for( size_t i=1; i<nA; i++ )
{
int temp_rdigits;
int256 temp = {};
get_int256_from_qualified_field(temp, temp_rdigits, A[i], A_o[i], A_s[i]);
// We have to scale the one with fewer rdigits to match the one with greater
// rdigits:
if( phase1_rdigits > temp_rdigits )
{
scale_int256_by_digits(temp, phase1_rdigits - temp_rdigits);
temp_rdigits = phase1_rdigits;
}
else if( phase1_rdigits < temp_rdigits )
{
scale_int256_by_digits(phase1_result, temp_rdigits - phase1_rdigits);
phase1_rdigits = temp_rdigits;
}
// The two numbers have the same number of rdigits. It's now safe to add
// them.
add_int256_to_int256(phase1_result, temp);
}
// phase1_result/phase1_rdigits now reflect the sum of all A[]
int overflow = squeeze_int256(phase1_result, phase1_rdigits);
if( overflow )
{
*compute_error |= compute_error_overflow;
}
}
extern "C"
void
__gg__addf1_fixed_phase2( cbl_arith_format_t ,
size_t ,
size_t ,
size_t ,
cbl_round_t *rounded,
int on_error_flag,
int *compute_error
)
{
cblc_field_t **C = __gg__treeplet_3f;
size_t *C_o = __gg__treeplet_3o;
size_t *C_s = __gg__treeplet_3s;
// This is the assignment phase of an ADD Format 1
// We take phase1_result and accumulate it into C
bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
if( C[0]->type == FldFloat)
{
// The target we need to accumulate into is a floating-point number, so we
// need to convert our fixed-point intermediate into floating point and
// proceed accordingly.
// Convert the intermediate
GCOB_FP128 value_a = (GCOB_FP128)phase1_result.i128[0];
value_a /= __gg__power_of_ten(phase1_rdigits);
// Pick up the target
GCOB_FP128 value_b = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
value_a += value_b;
// At this point, we assign running_sum to *C.
*compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
on_size_error,
value_a,
*rounded++);
}
else
{
// We have a fixed-point intermediate, and we are accumulating intoi a
// fixed point target.
int256 value_a = phase1_result;
int rdigits_a = phase1_rdigits;
int256 value_b = {};
int rdigits_b;
get_int256_from_qualified_field(value_b, rdigits_b, C[0], C_o[0], C_s[0]);
// We have to scale the one with fewer rdigits to match the one with greater
// rdigits:
if( rdigits_a > rdigits_b )
{
scale_int256_by_digits(value_b, rdigits_a - rdigits_b);
rdigits_b = rdigits_a;
}
else if( rdigits_a < rdigits_b )
{
scale_int256_by_digits(value_a, rdigits_b - rdigits_a);
rdigits_a = rdigits_b;
}
// The two numbers have the same number of rdigits. It's now safe to add
// them.
add_int256_to_int256(value_a, value_b);
int overflow = squeeze_int256(value_a, rdigits_a);
if( overflow )
{
*compute_error |= compute_error_overflow;
}
// At this point, we assign running_sum to *C.
*compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
on_size_error,
value_a.i128[0],
rdigits_a,
*rounded++);
}
}
extern "C"
void
__gg__fixed_phase2_assign_to_c( cbl_arith_format_t ,
size_t ,
size_t ,
size_t ,
cbl_round_t *rounded,
int on_error_flag,
int *compute_error
)
{
// This is the assignment phase of an ADD Format 2
cblc_field_t **C = __gg__treeplet_3f;
size_t *C_o = __gg__treeplet_3o;
size_t *C_s = __gg__treeplet_3s;
// We take phase1_result and put it into C
bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
if( C[0]->type == FldFloat)
{
// The target we need to accumulate into is a floating-point number, so we
// need to convert our fixed-point intermediate into floating point and
// proceed accordingly.
// Convert the intermediate
GCOB_FP128 value_a = (GCOB_FP128)phase1_result.i128[0];
value_a /= __gg__power_of_ten(phase1_rdigits);
*compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
on_size_error,
value_a,
*rounded++);
}
else
{
// We have a fixed-point intermediate, and we are accumulating intoi a
// fixed point target.
int256 value_a = phase1_result;
int rdigits_a = phase1_rdigits;
int overflow = squeeze_int256(value_a, rdigits_a);
if( overflow )
{
*compute_error |= compute_error_overflow;
}
// At this point, we assign that value to *C.
*compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
on_size_error,
value_a.i128[0],
rdigits_a,
*rounded++);
}
}
extern "C"
void
__gg__add_float_phase1( cbl_arith_format_t ,
size_t nA,
size_t ,
size_t ,
cbl_round_t *,
int ,
int *compute_error
)
{
// Our job is to add together the nA floating-point values in the A[] array
// The result goes into the temporary phase1_result_ffloat.
cblc_field_t **A = __gg__treeplet_1f;
size_t *A_o = __gg__treeplet_1o;
size_t *A_s = __gg__treeplet_1s;
// Let us prime the pump with the first value of A[]
phase1_result_float = __gg__float128_from_qualified_field(A[0], A_o[0], A_s[0]);
// We now go into a loop adding each of the A[] values to phase1_result_flt:
for( size_t i=1; i<nA; i++ )
{
GCOB_FP128 temp = __gg__float128_from_qualified_field(A[i], A_o[i], A_s[i]);
phase1_result_float = addition_helper_float(phase1_result_float,
temp,
compute_error);
}
}
extern "C"
void
__gg__addf1_float_phase2( cbl_arith_format_t ,
size_t ,
size_t ,
size_t ,
cbl_round_t *rounded,
int on_error_flag,
int *compute_error
)
{
cblc_field_t **C = __gg__treeplet_3f;
size_t *C_o = __gg__treeplet_3o;
size_t *C_s = __gg__treeplet_3s;
bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
// This is the assignment phase of an ADD Format 2
// We take phase1_result and accumulate it into C
GCOB_FP128 temp = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
temp = addition_helper_float(temp, phase1_result_float, compute_error);
*compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
on_size_error,
temp,
*rounded++);
}
extern "C"
void
__gg__float_phase2_assign_to_c( cbl_arith_format_t ,
size_t ,
size_t ,
size_t ,
cbl_round_t *rounded,
int on_error_flag,
int *compute_error
)
{
cblc_field_t **C = __gg__treeplet_3f;
size_t *C_o = __gg__treeplet_3o;
size_t *C_s = __gg__treeplet_3s;
bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
// This is the assignment phase of an ADD Format 2
// We take phase1_result and put it into C
*compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
on_size_error,
phase1_result_float,
*rounded++);
}
extern "C"
void
__gg__addf3(cbl_arith_format_t ,
size_t nA,
size_t ,
size_t ,
cbl_round_t *rounded,
int on_error_flag,
int *compute_error
)
{
// This is an ADD Format 3. Each A[i] gets accumulated into each C[i]. When
// both are fixed, we do fixed arithmetic. When either is a FldFloat, we
// do floating-point arithmetic.
cblc_field_t **A = __gg__treeplet_1f;
size_t *A_o = __gg__treeplet_1o;
size_t *A_s = __gg__treeplet_1s;
cblc_field_t **C = __gg__treeplet_3f;
size_t *C_o = __gg__treeplet_3o;
size_t *C_s = __gg__treeplet_3s;
bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
for(size_t i=0; i<nA; i++)
{
if( A[i]->type == FldFloat || C[i]->type == FldFloat )
{
GCOB_FP128 value_a = __gg__float128_from_qualified_field(A[i], A_o[i], A_s[i]);
GCOB_FP128 value_b = __gg__float128_from_qualified_field(C[i], C_o[i], C_s[i]);
value_a = addition_helper_float(value_a, value_b, compute_error);
// At this point, we assign the sum to *C.
*compute_error |= conditional_stash(C[i], C_o[i], C_s[i],
on_size_error,
value_a,
*rounded++);
}
else
{
// We have are doing fixed-point arithmetic.
int256 value_a;
int rdigits_a;
int256 value_b;
int rdigits_b;
get_int256_from_qualified_field(value_a, rdigits_a, A[i], A_o[i], A_s[i]);
get_int256_from_qualified_field(value_b, rdigits_b, C[i], C_o[i], C_s[i]);
// We have to scale the one with fewer rdigits to match the one with greater
// rdigits:
if( rdigits_a > rdigits_b )
{
scale_int256_by_digits(value_b, rdigits_a - rdigits_b);
rdigits_b = rdigits_a;
}
else if( rdigits_a < rdigits_b )
{
scale_int256_by_digits(value_a, rdigits_b - rdigits_a);
rdigits_a = rdigits_b;
}
// The two numbers have the same number of rdigits. It's now safe to add
// them.
add_int256_to_int256(value_a, value_b);
int overflow = squeeze_int256(value_a, rdigits_a);
if( overflow )
{
*compute_error |= compute_error_overflow;
}
// At this point, we assign the sum to *C.
*compute_error |= conditional_stash(C[i], C_o[i], C_s[i],
on_size_error,
value_a.i128[0],
rdigits_a,
*rounded++);
}
}
}
extern "C"
void
__gg__subtractf1_fixed_phase2(cbl_arith_format_t ,
size_t ,
size_t ,
size_t ,
cbl_round_t *rounded,
int on_error_flag,
int *compute_error
)
{
cblc_field_t **C = __gg__treeplet_3f;
size_t *C_o = __gg__treeplet_3o;
size_t *C_s = __gg__treeplet_3s;
// This is the assignment phase of an ADD Format 1
// We take phase1_result and subtrace it from C
bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
if( C[0]->type == FldFloat)
{
// The target we need to accumulate into is a floating-point number, so we
// need to convert our fixed-point intermediate into floating point and
// proceed accordingly.
// Convert the intermediate
GCOB_FP128 value_a = (GCOB_FP128)phase1_result.i128[0];
value_a /= __gg__power_of_ten(phase1_rdigits);
// Pick up the target
GCOB_FP128 value_b = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
value_b -= value_a;
// At this point, we assign the difference to *C.
*compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
on_size_error,
value_b,
*rounded++);
}
else
{
// We have a fixed-point intermediate, and we are accumulating intoi a
// fixed point target.
int256 value_a = phase1_result;
int rdigits_a = phase1_rdigits;
int256 value_b = {};
int rdigits_b;
get_int256_from_qualified_field(value_b, rdigits_b, C[0], C_o[0], C_s[0]);
// We have to scale the one with fewer rdigits to match the one with greater
// rdigits:
if( rdigits_a > rdigits_b )
{
scale_int256_by_digits(value_b, rdigits_a - rdigits_b);
rdigits_b = rdigits_a;
}
else if( rdigits_a < rdigits_b )
{
scale_int256_by_digits(value_a, rdigits_b - rdigits_a);
rdigits_a = rdigits_b;
}
// The two numbers have the same number of rdigits. It's now safe to add
// them.
subtract_int256_from_int256(value_b, value_a);
int overflow = squeeze_int256(value_b, rdigits_b);
if( overflow )
{
*compute_error |= compute_error_overflow;
}
// At this point, we assign running_sum to *C.
*compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
on_size_error,
value_b.i128[0],
rdigits_b,
*rounded++);
}
}
extern "C"
void
__gg__subtractf2_fixed_phase1(cbl_arith_format_t ,
size_t nA,
size_t ,
size_t ,
cbl_round_t *rounded,
int on_error_flag,
int *compute_error
)
{
// This is the calculation phase of a fixed-point SUBTRACT Format 2
cblc_field_t **B = __gg__treeplet_2f;
size_t *B_o = __gg__treeplet_2o;
size_t *B_s = __gg__treeplet_2s;
// Add up all the A values
__gg__add_fixed_phase1( not_expected_e ,
nA,
0,
0,
rounded,
on_error_flag,
compute_error);
// Subtract that from the B value:
int256 value_a = phase1_result;
int rdigits_a = phase1_rdigits;
int256 value_b = {};
int rdigits_b;
get_int256_from_qualified_field(value_b, rdigits_b, B[0], B_o[0], B_s[0]);
// We have to scale the one with fewer rdigits to match the one with greater
// rdigits:
if( rdigits_a > rdigits_b )
{
scale_int256_by_digits(value_b, rdigits_a - rdigits_b);
rdigits_b = rdigits_a;
}
else if( rdigits_a < rdigits_b )
{
scale_int256_by_digits(value_a, rdigits_b - rdigits_a);
rdigits_a = rdigits_b;
}
// The two numbers have the same number of rdigits. It's now safe to add
// them.
subtract_int256_from_int256(value_b, value_a);
int overflow = squeeze_int256(value_b, rdigits_b);
if( overflow )
{
*compute_error |= compute_error_overflow;
}
phase1_result = value_b;
phase1_rdigits = rdigits_b;
}
extern "C"
void
__gg__subtractf1_float_phase2(cbl_arith_format_t ,
size_t ,
size_t ,
size_t ,
cbl_round_t *rounded,
int on_error_flag,
int *compute_error
)
{
cblc_field_t **C = __gg__treeplet_3f;
size_t *C_o = __gg__treeplet_3o;
size_t *C_s = __gg__treeplet_3s;
bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
// This is the assignment phase of an ADD Format 2
// We take phase1_result and subtract it from C
GCOB_FP128 temp = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
temp = subtraction_helper_float(temp, phase1_result_float, compute_error);
*compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
on_size_error,
temp,
*rounded++);
}
extern "C"
void
__gg__subtractf2_float_phase1(cbl_arith_format_t ,
size_t nA,
size_t ,
size_t ,
cbl_round_t *rounded,
int on_error_flag,
int *compute_error
)
{
// This is the calculation phase of a fixed-point SUBTRACT Format 2
cblc_field_t **B = __gg__treeplet_2f;
size_t *B_o = __gg__treeplet_2o;
size_t *B_s = __gg__treeplet_2s;
// Add up all the A values
__gg__add_float_phase1( not_expected_e ,
nA,
0,
0,
rounded,
on_error_flag,
compute_error
);
// Subtract that from the B value:
GCOB_FP128 value_b = __gg__float128_from_qualified_field(B[0], B_o[0], B_s[0]);
// The two numbers have the same number of rdigits. It's now safe to add
// them.
phase1_result_float = subtraction_helper_float(value_b, phase1_result_float, compute_error);
}
extern "C"
void
__gg__subtractf3( cbl_arith_format_t ,
size_t nA,
size_t ,
size_t ,
cbl_round_t *rounded,
int on_error_flag,
int *compute_error
)
{
// This is an ADD Format 3. Each A[i] gets accumulated into each C[i]. Each
// SUBTRACTION is treated separately.
cblc_field_t **A = __gg__treeplet_1f;
size_t *A_o = __gg__treeplet_1o;
size_t *A_s = __gg__treeplet_1s;
cblc_field_t **C = __gg__treeplet_3f;
size_t *C_o = __gg__treeplet_3o;
size_t *C_s = __gg__treeplet_3s;
bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
for(size_t i=0; i<nA; i++)
{
if( A[i]->type == FldFloat || C[i]->type == FldFloat)
{
GCOB_FP128 value_a = __gg__float128_from_qualified_field(A[i], A_o[i], A_s[i]);
GCOB_FP128 value_b = __gg__float128_from_qualified_field(C[i], C_o[i], C_s[i]);
value_b = subtraction_helper_float(value_b, value_a, compute_error);
// At this point, we assign the sum to *C.
*compute_error |= conditional_stash(C[i], C_o[i], C_s[i],
on_size_error,
value_b,
*rounded++);
}
else
{
// We are doing fixed-point subtraction.
int256 value_a;
int rdigits_a;
int256 value_b;
int rdigits_b;
get_int256_from_qualified_field(value_a, rdigits_a, A[i], A_o[i], A_s[i]);
get_int256_from_qualified_field(value_b, rdigits_b, C[i], C_o[i], C_s[i]);
// We have to scale the one with fewer rdigits to match the one with greater
// rdigits:
if( rdigits_a > rdigits_b )
{
scale_int256_by_digits(value_b, rdigits_a - rdigits_b);
rdigits_b = rdigits_a;
}
else if( rdigits_a < rdigits_b )
{
scale_int256_by_digits(value_a, rdigits_b - rdigits_a);
rdigits_a = rdigits_b;
}
// The two numbers have the same number of rdigits. It's now safe to add
// them.
subtract_int256_from_int256(value_b, value_a);
int overflow = squeeze_int256(value_b, rdigits_b);
if( overflow )
{
*compute_error |= compute_error_overflow;
}
// At this point, we assign the sum to *C.
*compute_error |= conditional_stash(C[i], C_o[i], C_s[i],
on_size_error,
value_b.i128[0],
rdigits_b,
*rounded++);
}
}
}
static bool multiply_intermediate_is_float;
static GCOB_FP128 multiply_intermediate_float;
static __int128 multiply_intermediate_int128;
static int multiply_intermediate_rdigits;
extern "C"
void
__gg__multiplyf1_phase1(cbl_arith_format_t ,
size_t ,
size_t ,
size_t ,
cbl_round_t *,
int ,
int *)
{
// We are getting just the one value, which we are converting to the necessary
// intermediate form
cblc_field_t **A = __gg__treeplet_1f;
size_t *A_o = __gg__treeplet_1o;
size_t *A_s = __gg__treeplet_1s;
if( A[0]->type == FldFloat )
{
multiply_intermediate_is_float = true;
multiply_intermediate_float = __gg__float128_from_qualified_field(A[0],
A_o[0],
A_s[0]);
}
else
{
multiply_intermediate_is_float = false;
multiply_intermediate_int128 =
__gg__binary_value_from_qualified_field(&multiply_intermediate_rdigits,
A[0],
A_o[0],
A_s[0]);
}
}
static
void multiply_int128_by_int128(int256 &ABCD,
__int128 ab_value,
__int128 cd_value)
{
int is_negative = ( ((uint8_t *)(&ab_value))[15]^((uint8_t *)(&cd_value))[15]) & 0x80;
if( ab_value < 0 )
{
ab_value = -ab_value;
}
if( cd_value < 0 )
{
cd_value = -cd_value;
}
uint128 AC00;
uint128 AD0;
uint128 BC0;
uint128 BD;
// Let's extract the digits.
uint64_t a = *(uint64_t *)((unsigned char *)(&ab_value)+8);
uint64_t b = *(uint64_t *)((unsigned char *)(&ab_value)+0);
uint64_t c = *(uint64_t *)((unsigned char *)(&cd_value)+8);
uint64_t d = *(uint64_t *)((unsigned char *)(&cd_value)+0);
// multiply (a0 + b) * (c0 + d)
AC00 = (uint128)a * c;
AD0 = (uint128)a * d;
BC0 = (uint128)b * c;
BD = (uint128)b * d;
// ABCD is the sum of those four pieces
int256 temp;
ABCD.i128[1] = 0;
ABCD.i128[0] = BD;
temp.i64[3] = 0;
memcpy(&temp.i64[1], &BC0, 16);
temp.i64[0] = 0;
add_int256_to_int256(ABCD, temp);
memcpy(&temp.i64[1], &AD0, 16);
add_int256_to_int256(ABCD, temp);
temp.i64[1] = 0;
memcpy(&temp.i64[2], &AC00, 16);
add_int256_to_int256(ABCD, temp);
// ABCD is now a 256-bit integer with rdigits decimal places
if(is_negative)
{
negate_int256(ABCD);
}
}
extern "C"
void
__gg__multiplyf1_phase2(cbl_arith_format_t ,
size_t ,
size_t ,
size_t ,
cbl_round_t *rounded,
int on_error_flag,
int *compute_error
)
{
cblc_field_t **C = __gg__treeplet_3f;
size_t *C_o = __gg__treeplet_3o;
size_t *C_s = __gg__treeplet_3s;
bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
int error_this_time=0;
GCOB_FP128 a_value;
GCOB_FP128 b_value;
if( multiply_intermediate_is_float )
{
a_value = multiply_intermediate_float;
if( C[0]->type == FldFloat )
{
b_value = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
goto float_float;
}
else
{
// float times fixed
b_value = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
goto float_float;
}
}
else
{
if( C[0]->type == FldFloat )
{
// gixed * float
a_value = (GCOB_FP128) multiply_intermediate_int128;
if( multiply_intermediate_rdigits )
{
a_value /= (GCOB_FP128)__gg__power_of_ten(multiply_intermediate_rdigits);
}
b_value = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
goto float_float;
}
else
{
// fixed times fixed
// We have two 128-bit numbers. Call them AB and CD, where A, B, C, D are
// 64-bit "digits". We need to multiply them to create a 256-bit result
int cd_rdigits;
__int128 ab_value = multiply_intermediate_int128;
__int128 cd_value = __gg__binary_value_from_qualified_field(&cd_rdigits, C[0], C_o[0], C_s[0]);
int256 ABCD;
int rdigits = multiply_intermediate_rdigits + cd_rdigits;
multiply_int128_by_int128(ABCD, ab_value, cd_value);
int overflow = squeeze_int256(ABCD, rdigits);
if( overflow )
{
*compute_error |= compute_error_overflow;
}
// At this point, we assign running_sum to *C.
*compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
on_size_error,
ABCD.i128[0],
rdigits,
*rounded++);
goto done;
}
}
float_float:
a_value = multiply_helper_float(a_value, b_value, &error_this_time);
if( error_this_time && on_size_error)
{
*compute_error |= error_this_time;
rounded++;
}
else
{
*compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
on_size_error,
a_value,
*rounded++);
}
done:
return;
}
extern "C"
void
__gg__multiplyf2( cbl_arith_format_t ,
size_t ,
size_t ,
size_t nC,
cbl_round_t *rounded,
int on_error_flag,
int *compute_error
)
{
cblc_field_t **A = __gg__treeplet_1f;
size_t *A_o = __gg__treeplet_1o;
size_t *A_s = __gg__treeplet_1s;
cblc_field_t **B = __gg__treeplet_2f;
size_t *B_o = __gg__treeplet_2o;
size_t *B_s = __gg__treeplet_2s;
cblc_field_t **C = __gg__treeplet_3f;
size_t *C_o = __gg__treeplet_3o;
size_t *C_s = __gg__treeplet_3s;
bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
bool got_float = false;
GCOB_FP128 product_float;
int256 product_fix;
int product_fix_digits;
if( A[0]->type == FldFloat || B[0]->type == FldFloat )
{
GCOB_FP128 a_value = __gg__float128_from_qualified_field(A[0], A_o[0], A_s[0]);
GCOB_FP128 b_value = __gg__float128_from_qualified_field(B[0], B_o[0], B_s[0]);
product_float = multiply_helper_float(a_value, b_value, compute_error);
got_float = true;
}
else
{
int a_rdigits;
int b_rdigits;
__int128 a_value = __gg__binary_value_from_qualified_field(&a_rdigits, A[0], A_o[0], A_s[0]);
__int128 b_value = __gg__binary_value_from_qualified_field(&b_rdigits, B[0], B_o[0], B_s[0]);
product_fix_digits = a_rdigits + b_rdigits;
multiply_int128_by_int128(product_fix, a_value, b_value);
int overflow = squeeze_int256(product_fix, product_fix_digits);
if( overflow )
{
*compute_error |= compute_error_overflow;
}
}
for(size_t i=0; i<nC; i++)
{
if( got_float )
{
*compute_error |= conditional_stash(C[i], C_o[i], C_s[i],
on_size_error,
product_float,
*rounded++);
}
else
{
*compute_error |= conditional_stash(C[i], C_o[i], C_s[i],
on_size_error,
product_fix.i128[0],
product_fix_digits,
*rounded++);
}
}
}
static void
shift_in_place128(uint8_t *buf, int size, int bits)
{
// Assume that size in bytes is some multiple of sixteen. That is, the
// buffer we are shifting is made up of either two 128-bit bit values (for
// an int256) or three 128-bit values (an int384)
// "bits" is a value from zero to 127
if( !bits )
{
return;
}
size_t places = size/16; // This is either two or three
uint128 temp;
uint128 overflow = 0;
uint128 *as128 = (uint128 *)buf;
for( size_t i=0; i<places; i++ )
{
temp = as128[i] >> (128 - bits);
as128[i] <<= bits;
as128[i] += overflow;
overflow = temp;
}
}
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-function"
static char *
int256_as_decimal(int256 val)
{
char ach[120];
memset(ach, 0, sizeof(ach));
strcpy(ach, "0");
int index = 0;
while(val.i128[0] || val.i128[1])
{
int256 before;
int256 after;
before = val;
after = val;
divide_int256_by_int64(after, 10);
multiply_int256_by_int64(after, 10);
uint64_t digit = before.i64[0] - after.i64[0];
ach[index++] = digit + '0';
divide_int256_by_int64(val, 10);
}
if( !index )
{
index = 1;
}
index -= 1;
int r = 0;
static char retval[120];
while(index >= 0)
{
retval[r++] = ach[index--];
}
retval[r++] = '\0';
return retval;
}
#pragma GCC diagnostic pop
static void
divide_int128_by_int128(int256 &quotient,
int &quotient_rdigits,
__int128 dividend,
int dividend_rdigits,
__int128 divisor,
int divisor_rdigits,
int *compute_error)
{
if( divisor == 0 )
{
*compute_error |= compute_error_divide_by_zero;
memset(&quotient, 0, sizeof(quotient));
memcpy(&quotient, &dividend, 8);
quotient_rdigits = dividend_rdigits;
return;
}
bool is_negative = false;
if(dividend < 0)
{
is_negative = !is_negative;
dividend = -dividend;
}
if(divisor < 0)
{
is_negative = !is_negative;
divisor = -divisor;
}
// We are going to be referencing the 64-bit pices of the 128-bit divisor:
uint64_t *divisor64 = (uint64_t *)&divisor;
quotient.i128[1] = 0;
quotient.i128[0] = dividend;
// In order to get 0.3333333.... from 1 / 3, we are going to scale up the
// numerator so that it has 37 rdigits:
int scale = MAX_FIXED_POINT_DIGITS;
scale_int256_by_digits(quotient, scale);
quotient_rdigits = scale + dividend_rdigits - divisor_rdigits;
// Now, let's see if we can do a simple divide-by-single-place calculation:
if( divisor64[1] == 0 )
{
// Yes! The divisor fits into 64 bits:
divide_int256_by_int64(quotient, (uint64_t)divisor);
}
else
{
// We have to do long division, and that means Knuth's Algorithm D. Let's
// review where we are.
//
// We are using 64-bit places to divide one 128-bit number by another. We
// know that both are positive. So, we are dividing
//
// AB by CD, where we know C is non-zero.
//
// Because we want fractional digits to the right, we multiplied by 10**37,
// which is smaller than 2**64, so we have one additional place:
//
// ABx by CD
//
// Algorithm D requires that we left-shift ABX and CD by enough bits to make
// turn on high-order by of CD. This will be a value between 1 and 63
// shifts, resulting in
//
// ABxy by CD.
//
// We can know that the quotient will have at most two places. We can see
// this in the decimal analogy. The worst case scenario is dividing
//
// 99 by 10
//
// We multiply the top by 10 to give us one fractional decimal place in the
// result:
//
// 990 by 10
//
// To satisfy Algorithm D's requirement that C be >= b/2, we multiply both
// dividend and divisor by 5:
//
// 4950 by 50
//
// And then we do the work that gives us the two-place answer of 99.
//
//
// Here is our four-place 256-bit "numerator"
int256 numerator;
// Copy the three-place ABx value into the numerator area
memcpy(&numerator, &quotient, sizeof(quotient));
// Calculate how many bits we need to shift CD to make the high-order bit
// turn on:
int bits_to_shift = 0;
int i=15;
while( ((uint8_t *)(&divisor))[i] == 0 )
{
i -= 1;
bits_to_shift += 8;
}
uint8_t tail = ((uint8_t *)(&divisor))[i];
while( !(tail & 0x80) )
{
bits_to_shift += 1;
tail <<= 1;
}
// Shift both the numerator and the divisor that number of bits
shift_in_place128((uint8_t *)&numerator, sizeof(numerator), bits_to_shift);
shift_in_place128((uint8_t *)&divisor, sizeof(divisor), bits_to_shift);
// We are now ready to do the guess-multiply-subtract loop. We know that
// the result will have two places, so we know we are going to go through
// that loop two times. We will build the quotient from the high-order
// place down:
// Let's prime the pump by loading remnant with the A value of ABxyz
int q_place = 1;
memset(&quotient, 0, sizeof(quotient));
while( q_place >= 0 )
{
// We develop our guess for a quotient by dividing the top two places of
// the numerator area by C
uint128 temp;
uint64_t *temp64 = (uint64_t *)&temp;
temp64[1] = numerator.i64[q_place+2];
temp64[0] = numerator.i64[q_place+1];
quotient.i64[q_place] = temp / divisor64[1];
// Now we multiply our 64-bit guess by the 128-bit CD to get the
// three-place value we are going to subtract from the numerator area.
uint64_t subber[3];
subber[2] = 0;
// Start with the bottom 128 bits of the "subber"
*(uint128 *)subber = (uint128) divisor64[0] * quotient.i64[q_place];
// Get the next 128 bits of subber
temp = (uint128) divisor64[1] * quotient.i64[q_place];
// Add the top of the first product to the bottom of the second:
subber[1] += temp64[0];
// See if there was an overflow:
if( subber[1] < temp64[0] )
{
// There was an overflow
subber[2] = 1;
}
// And now put the top of the second product into place:
subber[2] += temp64[1];
// "subber" is now the three-place product of the two-place divisor times
// the one-place guess
// We now subtract the three-place subber from the appropriate place of
// the numerator:
uint64_t borrow = 0;
for(size_t i=0; i<3; i++)
{
if( numerator.i64[q_place + i] == 0 && borrow )
{
// We are subtracting from zero and we have a borrow. Leave the
// borrow on and just do the subtraction:
numerator.i64[q_place + i] -= subber[i];
}
else
{
uint64_t stash = numerator.i64[q_place + i];
numerator.i64[q_place + i] -= borrow;
numerator.i64[q_place + i] -= subber[i];
if( numerator.i64[q_place + i] > stash )
{
// After subtracting, the value got bigger, which means we have
// to borrow from the next value to the left
borrow = 1;
}
else
{
borrow = 0;
}
}
}
// The three-place subber has been removed from the numerator.
// Now Algorithm D comes back into play. Knuth has proved that the guess
// is usually correct, but sometimes can be one too large, which means
// that the numerator area goes negative. On rare occasions, the guess can
// be two too large. So, we have to make sure that the numerator area is
// actually positive by adding subber back in.
while( numerator.i64[q_place+2] & 0x8000000000000000UL )
{
// We need to add subber back into the numerator area
uint64_t carry = 0;
for(size_t i=0; i<3; i++)
{
if( numerator.i64[q_place + i] == 0xFFFFFFFFFFFFFFFFUL && carry )
{
// We are at the top and have a carry. Just leave the carry on
// and do the addition:
numerator.i64[q_place + i] += subber[i];
}
else
{
// We are not at the top.
uint64_t stash = numerator.i64[q_place + i];
numerator.i64[q_place + i] += carry;
numerator.i64[q_place + i] += subber[i];
if( numerator.i64[q_place + i] < stash )
{
// The addition caused the result to get smaller, meaning that
// we wrapped around:
carry = 1;
}
else
{
carry = 0;
}
}
}
}
q_place -= 1;
}
}
if( is_negative )
{
negate_int256(quotient);
}
}
extern "C"
void
__gg__dividef1_phase2(cbl_arith_format_t ,
size_t ,
size_t ,
size_t ,
cbl_round_t *rounded,
int on_error_flag,
int *compute_error
)
{
cblc_field_t **C = __gg__treeplet_3f;
size_t *C_o = __gg__treeplet_3o;
size_t *C_s = __gg__treeplet_3s;
bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
int error_this_time=0;
GCOB_FP128 a_value;
GCOB_FP128 b_value;
if( multiply_intermediate_is_float )
{
a_value = multiply_intermediate_float;
if( C[0]->type == FldFloat )
{
b_value = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
goto float_float;
}
else
{
// float times fixed
b_value = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
goto float_float;
}
}
else
{
if( C[0]->type == FldFloat )
{
// gixed * float
a_value = (GCOB_FP128) multiply_intermediate_int128;
if( multiply_intermediate_rdigits )
{
a_value /= (GCOB_FP128)__gg__power_of_ten(multiply_intermediate_rdigits);
}
b_value = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
goto float_float;
}
else
{
// fixed times fixed
// We have two 128-bit numbers. Call them AB and CD, where A, B, C, D are
// 64-bit "digits". We need to multiply them to create a 256-bit result
int dividend_rdigits;
__int128 dividend = __gg__binary_value_from_qualified_field(&dividend_rdigits, C[0], C_o[0], C_s[0]);
int quotient_rdigits;
int256 quotient;
divide_int128_by_int128(quotient,
quotient_rdigits,
dividend,
dividend_rdigits,
multiply_intermediate_int128,
multiply_intermediate_rdigits,
compute_error);
int overflow = squeeze_int256(quotient, quotient_rdigits);
if( overflow )
{
*compute_error |= compute_error_overflow;
}
// At this point, we assign the quotient to *C.
*compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
on_size_error,
quotient.i128[0],
quotient_rdigits,
*rounded++);
goto done;
}
}
float_float:
b_value = divide_helper_float(b_value, a_value, &error_this_time);
*compute_error |= error_this_time;
if( error_this_time && on_size_error)
{
rounded++;
}
else
{
*compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
on_size_error,
b_value,
*rounded++);
}
done:
return;
}
extern "C"
void
__gg__dividef23(cbl_arith_format_t ,
size_t ,
size_t ,
size_t nC,
cbl_round_t *rounded,
int on_error_flag,
int *compute_error
)
{
cblc_field_t **A = __gg__treeplet_1f;
size_t *A_o = __gg__treeplet_1o;
size_t *A_s = __gg__treeplet_1s;
cblc_field_t **B = __gg__treeplet_2f;
size_t *B_o = __gg__treeplet_2o;
size_t *B_s = __gg__treeplet_2s;
cblc_field_t **C = __gg__treeplet_3f;
size_t *C_o = __gg__treeplet_3o;
size_t *C_s = __gg__treeplet_3s;
bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
int error_this_time=0;
if( A[0]->type == FldFloat || B[0]->type == FldFloat )
{
GCOB_FP128 a_value;
GCOB_FP128 b_value;
GCOB_FP128 c_value;
a_value = __gg__float128_from_qualified_field(A[0], A_o[0], A_s[0]);
b_value = __gg__float128_from_qualified_field(B[0], B_o[0], B_s[0]);
c_value = divide_helper_float(a_value, b_value, &error_this_time);
*compute_error |= error_this_time;
if( !error_this_time )
{
for(size_t i=0; i<nC; i++)
{
*compute_error |= conditional_stash(C[i], C_o[i], C_s[i],
on_size_error,
c_value,
*rounded++);
}
}
}
else
{
// fixed divided by fixed
int dividend_rdigits;
__int128 dividend = __gg__binary_value_from_qualified_field(&dividend_rdigits, A[0], A_o[0], A_s[0]);
int divisor_rdigits;
__int128 divisor = __gg__binary_value_from_qualified_field(&divisor_rdigits, B[0], B_o[0], B_s[0]);
int quotient_rdigits;
int256 quotient;
divide_int128_by_int128(quotient,
quotient_rdigits,
dividend,
dividend_rdigits,
divisor,
divisor_rdigits,
compute_error);
*compute_error |= squeeze_int256(quotient, quotient_rdigits);
if( !*compute_error )
{
// At this point, we assign the quotient to *C.
for(size_t i=0; i<nC; i++)
{
*compute_error |= conditional_stash(C[i], C_o[i], C_s[i],
on_size_error,
quotient.i128[0],
quotient_rdigits,
*rounded++);
}
}
}
}
extern "C"
void
__gg__dividef45(cbl_arith_format_t ,
size_t ,
size_t ,
size_t ,
cbl_round_t *rounded_p,
int on_error_flag,
int *compute_error
)
{
cblc_field_t **A = __gg__treeplet_1f; // Numerator
size_t *A_o = __gg__treeplet_1o;
size_t *A_s = __gg__treeplet_1s;
cblc_field_t **B = __gg__treeplet_2f; // Denominator
size_t *B_o = __gg__treeplet_2o;
size_t *B_s = __gg__treeplet_2s;
cblc_field_t **C = __gg__treeplet_3f; // Has remainder, then quotient
size_t *C_o = __gg__treeplet_3o;
size_t *C_s = __gg__treeplet_3s;
bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
int error_this_time=0;
if( A[0]->type == FldFloat || B[0]->type == FldFloat )
{
GCOB_FP128 a_value;
GCOB_FP128 b_value;
GCOB_FP128 c_value;
a_value = __gg__float128_from_qualified_field(A[0], A_o[0], A_s[0]);
b_value = __gg__float128_from_qualified_field(B[0], B_o[0], B_s[0]);
c_value = divide_helper_float(a_value, b_value, &error_this_time);
*compute_error |= error_this_time;
if( !error_this_time )
{
*compute_error |= conditional_stash(C[1], C_o[1], C_s[1],
on_size_error,
c_value,
*rounded_p++);
// This is floating point, and there is a remainder, and we don't know
// what that means. Set the remainder to zero
if( !*compute_error )
{
c_value = 0;
*compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
on_size_error,
c_value,
*rounded_p++);
}
}
}
else
{
// fixed divided by fixed
int dividend_rdigits;
__int128 dividend = __gg__binary_value_from_qualified_field(&dividend_rdigits, A[0], A_o[0], A_s[0]);
int divisor_rdigits;
__int128 divisor = __gg__binary_value_from_qualified_field(&divisor_rdigits, B[0], B_o[0], B_s[0]);
int quotient_rdigits;
int256 quotient;
divide_int128_by_int128(quotient,
quotient_rdigits,
dividend,
dividend_rdigits,
divisor,
divisor_rdigits,
compute_error);
*compute_error |= squeeze_int256(quotient, quotient_rdigits);
if( !*compute_error )
{
// We are going to need the unrounded quotient to calculate the remainder
__int128 unrounded_quotient = 0;
int unrounded_quotient_digits;
rounded_p += 1;// Skip the rounded value for the remainder
cbl_round_t rounded = *rounded_p;
switch(rounded)
{
case truncation_e:
{
*compute_error |= conditional_stash(C[1], C_o[1], C_s[1],
on_size_error,
quotient.i128[0],
quotient_rdigits,
*rounded_p++);
unrounded_quotient = __gg__binary_value_from_qualified_field(
&unrounded_quotient_digits,
C[1], C_o[1], C_s[1]);
break;
}
default:
{
conditional_stash(C[1], C_o[1], C_s[1],
false,
quotient.i128[0],
quotient_rdigits,
truncation_e);
unrounded_quotient = __gg__binary_value_from_qualified_field(
&unrounded_quotient_digits,
C[1], C_o[1], C_s[1]);
// At this point, we assign the rounded quotient to *C.
*compute_error |= conditional_stash(C[1], C_o[1], C_s[1],
on_size_error,
quotient.i128[0],
quotient_rdigits,
*rounded_p++);
break;
}
}
if( !*compute_error )
{
// We need to calculate the remainder
// Remainders in COBOL are seriously weird. The NIST suite
// has an example where 174 is divided by 16. The quotient
// is a 999.9, and the remainder is a 9999
// So, here goes: 174 by 16 is 10.875. The unrounded
// assignment to Q is thus 10.8
// You then multiply 10.8 by 16, giving 172.8
// That gets subtracted from 174, giving 1.2
// That gets assigned to the 9999 remainder, which is
// thus 1
// Any mathematician would walk away, slowly, shaking their head.
// We need to multiply the unrounded quotient by the divisor.
int256 temp;
int temp_rdigits;
// Step 1: Multiply the unrounded quotient by the divisor
multiply_int128_by_int128(temp, unrounded_quotient, divisor);
temp_rdigits = unrounded_quotient_digits + divisor_rdigits;
int256 odividend = {};
memcpy(&odividend, &dividend, sizeof(dividend));
// We need to line up the rdigits so that we can subtract temp from
// odividend:
if( temp_rdigits < dividend_rdigits )
{
scale_int256_by_digits(temp, dividend_rdigits-temp_rdigits);
temp_rdigits = dividend_rdigits;
}
else if(temp_rdigits > dividend_rdigits)
{
scale_int256_by_digits(odividend, temp_rdigits-dividend_rdigits);
}
subtract_int256_from_int256(odividend, temp);
*compute_error |= squeeze_int256(odividend, temp_rdigits);
if( !*compute_error )
{
*compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
on_size_error,
odividend.i128[0],
temp_rdigits,
truncation_e);
}
}
}
}
}