Skip to content

Commit fabc2d2

Browse files
committed
Implement Newton-Raphson division
Improve performance of huge divisions
1 parent e9c711a commit fabc2d2

File tree

5 files changed

+340
-39
lines changed

5 files changed

+340
-39
lines changed

bigdecimal.gemspec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ Gem::Specification.new do |s|
4343
ext/bigdecimal/bigdecimal.c
4444
ext/bigdecimal/bigdecimal.h
4545
ext/bigdecimal/bits.h
46+
ext/bigdecimal/div.h
4647
ext/bigdecimal/feature.h
4748
ext/bigdecimal/missing.c
4849
ext/bigdecimal/missing.h

ext/bigdecimal/bigdecimal.c

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,14 @@
2929
#endif
3030

3131
#include "bits.h"
32+
#include "div.h"
3233
#include "static_assert.h"
3334

3435
#if SIZEOF_DECDIG == 4
3536
#define USE_NTT_MULTIPLICATION 1
3637
#include "ntt.h"
3738
#define NTT_MULTIPLICATION_THRESHOLD 100
39+
#define NEWTON_RAPHSON_DIVISION_THRESHOLD 200
3840
#endif
3941

4042
#define BIGDECIMAL_VERSION "3.2.2"
@@ -81,11 +83,6 @@ static struct {
8183
uint8_t mode;
8284
} rbd_rounding_modes[RBD_NUM_ROUNDING_MODES];
8385

84-
typedef struct {
85-
VALUE bigdecimal;
86-
Real *real;
87-
} BDVALUE;
88-
8986
typedef struct {
9087
VALUE bigdecimal_or_nil;
9188
Real *real_or_null;
@@ -1131,9 +1128,6 @@ BigDecimal_check_num(Real *p)
11311128
VpCheckException(p, true);
11321129
}
11331130

1134-
static VALUE BigDecimal_fix(VALUE self);
1135-
static VALUE BigDecimal_split(VALUE self);
1136-
11371131
/* Returns the value as an Integer.
11381132
*
11391133
* If the BigDecimal is infinity or NaN, raises FloatDomainError.
@@ -3233,19 +3227,38 @@ BigDecimal_literal(const char *str)
32333227

32343228
#ifdef BIGDECIMAL_USE_VP_TEST_METHODS
32353229
VALUE
3236-
BigDecimal_vpdivd(VALUE self, VALUE r, VALUE cprec) {
3237-
BDVALUE a,b,c,d;
3230+
BigDecimal_vpdivd_generic(VALUE self, VALUE r, VALUE cprec, void (*vpdivd_func)(Real*, Real*, Real*, Real*)) {
3231+
BDVALUE a, b, c, d;
32383232
size_t cn = NUM2INT(cprec);
32393233
a = GetBDValueMust(self);
32403234
b = GetBDValueMust(r);
32413235
c = NewZeroWrap(1, cn * BASE_FIG);
32423236
d = NewZeroWrap(1, VPDIVD_REM_PREC(a.real, b.real, c.real) * BASE_FIG);
3243-
VpDivd(c.real, d.real, a.real, b.real);
3237+
vpdivd_func(c.real, d.real, a.real, b.real);
32443238
RB_GC_GUARD(a.bigdecimal);
32453239
RB_GC_GUARD(b.bigdecimal);
32463240
return rb_assoc_new(c.bigdecimal, d.bigdecimal);
32473241
}
32483242

3243+
void
3244+
VpDivdNormal(Real *c, Real *r, Real *a, Real *b) {
3245+
VpDivd(c, r, a, b);
3246+
}
3247+
VALUE
3248+
BigDecimal_vpdivd(VALUE self, VALUE r, VALUE cprec) {
3249+
return BigDecimal_vpdivd_generic(self, r, cprec, VpDivdNormal);
3250+
}
3251+
3252+
VALUE
3253+
BigDecimal_vpdivd_newton(VALUE self, VALUE r, VALUE cprec) {
3254+
return BigDecimal_vpdivd_generic(self, r, cprec, VpDivdNewton);
3255+
}
3256+
3257+
VALUE
3258+
BigDecimal_newton_raphson_inverse(VALUE self, VALUE prec) {
3259+
return newton_raphson_inverse(self, NUM2SIZET(prec));
3260+
}
3261+
32493262
VALUE
32503263
BigDecimal_vpmult(VALUE self, VALUE v) {
32513264
BDVALUE a,b,c;
@@ -3647,6 +3660,8 @@ Init_bigdecimal(void)
36473660

36483661
#ifdef BIGDECIMAL_USE_VP_TEST_METHODS
36493662
rb_define_method(rb_cBigDecimal, "vpdivd", BigDecimal_vpdivd, 2);
3663+
rb_define_method(rb_cBigDecimal, "vpdivd_newton", BigDecimal_vpdivd_newton, 2);
3664+
rb_define_method(rb_cBigDecimal, "newton_raphson_inverse", BigDecimal_newton_raphson_inverse, 1);
36503665
rb_define_method(rb_cBigDecimal, "vpmult", BigDecimal_vpmult, 1);
36513666
#ifdef USE_NTT_MULTIPLICATION
36523667
rb_define_method(rb_cBigDecimal, "nttmult", BigDecimal_nttmult, 1);
@@ -3706,7 +3721,6 @@ enum op_sw {
37063721
};
37073722

37083723
static int VpIsDefOP(Real *c, Real *a, Real *b, enum op_sw sw);
3709-
static int AddExponent(Real *a, SIGNED_VALUE n);
37103724
static DECDIG VpAddAbs(Real *a,Real *b,Real *c);
37113725
static DECDIG VpSubAbs(Real *a,Real *b,Real *c);
37123726
static size_t VpSetPTR(Real *a, Real *b, Real *c, size_t *a_pos, size_t *b_pos, size_t *c_pos, DECDIG *av, DECDIG *bv);
@@ -5063,6 +5077,14 @@ VpDivd(Real *c, Real *r, Real *a, Real *b)
50635077

50645078
if (word_a > word_r || word_b + word_c - 2 >= word_r) goto space_error;
50655079

5080+
#ifdef USE_NTT_MULTIPLICATION
5081+
// Newton-Raphson division requires multiplication to be faster than O(n^2)
5082+
if (word_c >= NEWTON_RAPHSON_DIVISION_THRESHOLD && word_b >= NEWTON_RAPHSON_DIVISION_THRESHOLD) {
5083+
VpDivdNewton(c, r, a, b);
5084+
goto Exit;
5085+
}
5086+
#endif
5087+
50665088
for (i = 0; i < word_a; ++i) r->frac[i] = a->frac[i];
50675089
for (i = word_a; i < word_r; ++i) r->frac[i] = 0;
50685090
for (i = 0; i < word_c; ++i) c->frac[i] = 0;

ext/bigdecimal/bigdecimal.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,11 @@ typedef struct {
188188
DECDIG frac[FLEXIBLE_ARRAY_SIZE]; /* Array of fraction part. */
189189
} Real;
190190

191+
typedef struct {
192+
VALUE bigdecimal;
193+
Real *real;
194+
} BDVALUE;
195+
191196
/*
192197
* ------------------
193198
* EXPORTables.
@@ -235,10 +240,30 @@ VP_EXPORT int VpActiveRound(Real *y, Real *x, unsigned short f, ssize_t il);
235240
VP_EXPORT int VpMidRound(Real *y, unsigned short f, ssize_t nf);
236241
VP_EXPORT int VpLeftRound(Real *y, unsigned short f, ssize_t nf);
237242
VP_EXPORT void VpFrac(Real *y, Real *x);
243+
VP_EXPORT int AddExponent(Real *a, SIGNED_VALUE n);
238244

239245
/* VP constants */
240246
VP_EXPORT Real *VpOne(void);
241247

248+
/*
249+
* **** BigDecimal part ****
250+
*/
251+
VP_EXPORT VALUE BigDecimal_lt(VALUE self, VALUE r);
252+
VP_EXPORT VALUE BigDecimal_ge(VALUE self, VALUE r);
253+
VP_EXPORT VALUE BigDecimal_exponent(VALUE self);
254+
VP_EXPORT VALUE BigDecimal_fix(VALUE self);
255+
VP_EXPORT VALUE BigDecimal_frac(VALUE self);
256+
VP_EXPORT VALUE BigDecimal_add(VALUE self, VALUE b);
257+
VP_EXPORT VALUE BigDecimal_sub(VALUE self, VALUE b);
258+
VP_EXPORT VALUE BigDecimal_mult(VALUE self, VALUE b);
259+
VP_EXPORT VALUE BigDecimal_add2(VALUE self, VALUE b, VALUE n);
260+
VP_EXPORT VALUE BigDecimal_sub2(VALUE self, VALUE b, VALUE n);
261+
VP_EXPORT VALUE BigDecimal_mult2(VALUE self, VALUE b, VALUE n);
262+
VP_EXPORT VALUE BigDecimal_split(VALUE self);
263+
VP_EXPORT inline BDVALUE GetBDValueMust(VALUE v);
264+
VP_EXPORT inline BDVALUE rbd_allocate_struct_zero_wrap(int sign, size_t const digits);
265+
#define NewZeroWrap rbd_allocate_struct_zero_wrap
266+
242267
/*
243268
* ------------------
244269
* MACRO definitions.

ext/bigdecimal/div.h

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
static VALUE
2+
pow10(ssize_t n) {
3+
BDVALUE x = NewZeroWrap(1, BIGDECIMAL_COMPONENT_FIGURES);
4+
x.real->exponent = n / BIGDECIMAL_COMPONENT_FIGURES;
5+
int mod = n % BIGDECIMAL_COMPONENT_FIGURES;
6+
if (mod < 0) mod += BIGDECIMAL_COMPONENT_FIGURES;
7+
x.real->exponent = (n - mod) / BIGDECIMAL_COMPONENT_FIGURES + 1;
8+
VpSetSign(x.real, 1);
9+
DECDIG v = 1;
10+
for (int i = 0; i < mod; i++) v = v * 10;
11+
x.real->frac[0] = v;
12+
return x.bigdecimal;
13+
}
14+
15+
// Calculate the inverse of x using the Newton-Raphson method.
16+
static VALUE
17+
newton_raphson_inverse(VALUE x, size_t prec) {
18+
BDVALUE bdone = NewZeroWrap(1, 1);
19+
VpSetOne(bdone.real);
20+
VALUE one = bdone.bigdecimal;
21+
22+
// Initial approximation in 2 digits
23+
BDVALUE bdx = GetBDValueMust(x);
24+
BDVALUE inv0 = NewZeroWrap(1, 2 * BIGDECIMAL_COMPONENT_FIGURES);
25+
VpSetOne(inv0.real);
26+
DECDIG_DBL numerator = (DECDIG_DBL)BIGDECIMAL_BASE * 100;
27+
DECDIG_DBL denominator = (DECDIG_DBL)bdx.real->frac[0] * 100 + (DECDIG_DBL)(bdx.real->Prec >= 2 ? bdx.real->frac[1] : 0) * 100 / BIGDECIMAL_BASE;
28+
inv0.real->frac[0] = (DECDIG)(numerator / denominator);
29+
inv0.real->frac[1] = (DECDIG)((numerator % denominator) * (BIGDECIMAL_BASE / 100) / denominator * 100);
30+
inv0.real->Prec = 2;
31+
inv0.real->exponent = 1 - bdx.real->exponent;
32+
VpNmlz(inv0.real);
33+
RB_GC_GUARD(bdx.bigdecimal);
34+
VALUE inv = inv0.bigdecimal;
35+
36+
int bl = 1;
37+
size_t prev_n = 2;
38+
while (((size_t)1 << bl) < prec) bl++;
39+
40+
for (int i = bl; i >= 0; i--) {
41+
size_t n = (prec >> i) + 2;
42+
if (n > prec) n = prec;
43+
// Newton-Raphson iteration: inv_next = inv + inv * (1 - x * inv)
44+
VALUE one_minus_x_inv = BigDecimal_sub2(
45+
one,
46+
BigDecimal_mult2(BigDecimal_mult2(x, one, SIZET2NUM(n)), inv, SIZET2NUM(n)),
47+
SIZET2NUM(SIZET2NUM(prev_n))
48+
);
49+
inv = BigDecimal_add2(
50+
inv,
51+
BigDecimal_mult2(inv, one_minus_x_inv, SIZET2NUM(SIZET2NUM(prev_n))),
52+
SIZET2NUM(n)
53+
);
54+
prev_n = n;
55+
}
56+
return inv;
57+
}
58+
59+
// Calculates divmod by multiplying approximate reciprocal of y
60+
static void
61+
divmod_by_inv_mul(VALUE x, VALUE y, VALUE inv, VALUE *res_div, VALUE *res_mod) {
62+
VALUE div = BigDecimal_fix(BigDecimal_mult(x, inv));
63+
VALUE mod = BigDecimal_sub(x, BigDecimal_mult(div, y));
64+
while (RTEST(BigDecimal_lt(mod, INT2FIX(0)))) {
65+
mod = BigDecimal_add(mod, y);
66+
div = BigDecimal_sub(div, INT2FIX(1));
67+
}
68+
while (RTEST(BigDecimal_ge(mod, y))) {
69+
mod = BigDecimal_sub(mod, y);
70+
div = BigDecimal_add(div, INT2FIX(1));
71+
}
72+
*res_div = div;
73+
*res_mod = mod;
74+
}
75+
76+
static void
77+
slice_copy(DECDIG *dest, Real *src, size_t rshift, size_t length) {
78+
ssize_t start = src->exponent - rshift - length;
79+
if (start >= (ssize_t)src->Prec) return;
80+
if (start < 0) {
81+
dest -= start;
82+
length += start;
83+
start = 0;
84+
}
85+
size_t max_length = src->Prec - start;
86+
memcpy(dest, src->frac + start, Min(length, max_length) * sizeof(DECDIG));
87+
}
88+
89+
/* Calculates divmod using Newton-Raphson method.
90+
* x and y must be a BigDecimal representing an integer value.
91+
*
92+
* To calculate with low cost, we need to split x into blocks and perform divmod for each block.
93+
* x_digits = remaining_digits(<= y_digits) + block_digits * num_blocks
94+
*
95+
* Example:
96+
* xxx_xxxxx_xxxxx_xxxxx(18 digits) / yyyyy(5 digits)
97+
* remaining_digits = 3, block_digits = 5, num_blocks = 3
98+
* repeating xxxxx_xxxxxx.divmod(yyyyy) calculation 3 times.
99+
*
100+
* In each divmod step, dividend is at most (y_digits + block_digits) digits and divisor is y_digits digits.
101+
* Reciprocal of y needs block_digits + 1 precision.
102+
*/
103+
static void
104+
divmod_newton(VALUE x, VALUE y, VALUE *div_out, VALUE *mod_out) {
105+
size_t x_digits = NUM2SIZET(BigDecimal_exponent(x));
106+
size_t y_digits = NUM2SIZET(BigDecimal_exponent(y));
107+
if (x_digits <= y_digits) x_digits = y_digits + 1;
108+
109+
size_t n = x_digits / y_digits;
110+
size_t block_figs = (x_digits - y_digits) / n / BIGDECIMAL_COMPONENT_FIGURES + 1;
111+
size_t block_digits = block_figs * BIGDECIMAL_COMPONENT_FIGURES;
112+
size_t num_blocks = (x_digits - y_digits + block_digits - 1) / block_digits;
113+
size_t y_figs = (y_digits - 1) / BIGDECIMAL_COMPONENT_FIGURES + 1;
114+
VALUE yinv = newton_raphson_inverse(y, block_digits + 1);
115+
116+
BDVALUE divident = NewZeroWrap(1, BIGDECIMAL_COMPONENT_FIGURES * (y_figs + block_figs));
117+
BDVALUE div_result = NewZeroWrap(1, BIGDECIMAL_COMPONENT_FIGURES * (num_blocks * block_figs + 1));
118+
BDVALUE bdx = GetBDValueMust(x);
119+
120+
// right shift
121+
VALUE mod = BigDecimal_mult(x, pow10(-num_blocks * block_digits));
122+
123+
for (ssize_t i = num_blocks - 1; i >= 0; i--) {
124+
memset(divident.real->frac, 0, (y_figs + block_figs) * sizeof(DECDIG));
125+
126+
BDVALUE bdmod = GetBDValueMust(mod);
127+
slice_copy(divident.real->frac, bdmod.real, 0, y_figs);
128+
slice_copy(divident.real->frac + y_figs, bdx.real, i * block_figs, block_figs);
129+
RB_GC_GUARD(bdmod.bigdecimal);
130+
131+
VpSetSign(divident.real, 1);
132+
divident.real->exponent = y_figs + block_figs;
133+
divident.real->Prec = y_figs + block_figs;
134+
VpNmlz(divident.real);
135+
136+
VALUE div;
137+
divmod_by_inv_mul(divident.bigdecimal, y, yinv, &div, &mod);
138+
BDVALUE bddiv = GetBDValueMust(div);
139+
slice_copy(div_result.real->frac + (num_blocks - i - 1) * block_figs, bddiv.real, 0, block_figs + 1);
140+
RB_GC_GUARD(bddiv.bigdecimal);
141+
}
142+
VpSetSign(div_result.real, 1);
143+
div_result.real->exponent = num_blocks * block_figs + 1;
144+
div_result.real->Prec = num_blocks * block_figs + 1;
145+
VpNmlz(div_result.real);
146+
RB_GC_GUARD(bdx.bigdecimal);
147+
RB_GC_GUARD(divident.bigdecimal);
148+
RB_GC_GUARD(div_result.bigdecimal);
149+
*div_out = div_result.bigdecimal;
150+
*mod_out = mod;
151+
}
152+
153+
static VALUE
154+
VpDivdNewtonInner(VALUE args_ptr)
155+
{
156+
Real **args = (Real**)args_ptr;
157+
Real *c = args[0], *r = args[1], *a = args[2], *b = args[3];
158+
BDVALUE a2, b2, c2, r2;
159+
VALUE div, mod, a2_frac = Qnil;
160+
size_t div_prec = c->MaxPrec - 1;
161+
size_t base_prec = b->Prec;
162+
163+
a2 = NewZeroWrap(1, a->Prec * BIGDECIMAL_COMPONENT_FIGURES);
164+
b2 = NewZeroWrap(1, b->Prec * BIGDECIMAL_COMPONENT_FIGURES);
165+
VpAsgn(a2.real, a, 1);
166+
VpAsgn(b2.real, b, 1);
167+
VpSetSign(a2.real, 1);
168+
VpSetSign(b2.real, 1);
169+
a2.real->exponent = base_prec + div_prec;
170+
b2.real->exponent = base_prec;
171+
172+
if ((ssize_t)a2.real->Prec > a2.real->exponent) {
173+
a2_frac = BigDecimal_frac(a2.bigdecimal);
174+
VpMidRound(a2.real, VP_ROUND_DOWN, 0);
175+
}
176+
divmod_newton(a2.bigdecimal, b2.bigdecimal, &div, &mod);
177+
if (a2_frac != Qnil) mod = BigDecimal_add(mod, a2_frac);
178+
179+
c2 = GetBDValueMust(div);
180+
r2 = GetBDValueMust(mod);
181+
VpAsgn(c, c2.real, VpGetSign(a) * VpGetSign(b));
182+
VpAsgn(r, r2.real, VpGetSign(a));
183+
AddExponent(c, a->exponent);
184+
AddExponent(c, -b->exponent);
185+
AddExponent(c, -div_prec);
186+
AddExponent(r, a->exponent);
187+
AddExponent(r, -base_prec - div_prec);
188+
RB_GC_GUARD(a2.bigdecimal);
189+
RB_GC_GUARD(a2.bigdecimal);
190+
RB_GC_GUARD(c2.bigdecimal);
191+
RB_GC_GUARD(r2.bigdecimal);
192+
return Qnil;
193+
}
194+
195+
static VALUE
196+
ensure_restore_prec_limit(VALUE limit)
197+
{
198+
VpSetPrecLimit(NUM2SIZET(limit));
199+
return Qnil;
200+
}
201+
202+
static void
203+
VpDivdNewton(Real *c, Real *r, Real *a, Real *b)
204+
{
205+
Real *args[4] = {c, r, a, b};
206+
size_t pl = VpGetPrecLimit();
207+
VpSetPrecLimit(0);
208+
// Ensure restoring prec limit because some methods used in VpDivdNewtonInner may raise an exception
209+
rb_ensure(VpDivdNewtonInner, (VALUE)args, ensure_restore_prec_limit, SIZET2NUM(pl));
210+
}

0 commit comments

Comments
 (0)