mirror of
https://github.com/postgres/postgres.git
synced 2025-06-12 00:01:43 -04:00
Frob numeric.c loop so that clang will auto-vectorize it too.
Experimentation shows that clang will auto-vectorize the critical multiplication loop if the termination condition is written "i2 < limit" rather than "i2 <= limit". This seems unbelievably stupid, but I've reproduced it on both clang 9.0.1 (RHEL8) and 11.0.3 (macOS Catalina). gcc doesn't care, so tweak the code to do it that way. Discussion: https://postgr.es/m/CAJ3gD9evtA_vBo+WMYMyT-u=keHX7-r8p2w7OSRfXf42LTwCZQ@mail.gmail.com
This commit is contained in:
parent
87e6ed7c8c
commit
9c79e646c6
@ -8191,7 +8191,6 @@ mul_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
|
|||||||
int res_weight;
|
int res_weight;
|
||||||
int maxdigits;
|
int maxdigits;
|
||||||
int *dig;
|
int *dig;
|
||||||
int *dig_i1_2;
|
|
||||||
int carry;
|
int carry;
|
||||||
int maxdig;
|
int maxdig;
|
||||||
int newdig;
|
int newdig;
|
||||||
@ -8327,7 +8326,7 @@ mul_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
|
|||||||
* Add the appropriate multiple of var2 into the accumulator.
|
* Add the appropriate multiple of var2 into the accumulator.
|
||||||
*
|
*
|
||||||
* As above, digits of var2 can be ignored if they don't contribute,
|
* As above, digits of var2 can be ignored if they don't contribute,
|
||||||
* so we only include digits for which i1+i2+2 <= res_ndigits - 1.
|
* so we only include digits for which i1+i2+2 < res_ndigits.
|
||||||
*
|
*
|
||||||
* This inner loop is the performance bottleneck for multiplication,
|
* This inner loop is the performance bottleneck for multiplication,
|
||||||
* so we want to keep it simple enough so that it can be
|
* so we want to keep it simple enough so that it can be
|
||||||
@ -8336,11 +8335,14 @@ mul_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
|
|||||||
* Since we aren't propagating carries in this loop, the order does
|
* Since we aren't propagating carries in this loop, the order does
|
||||||
* not matter.
|
* not matter.
|
||||||
*/
|
*/
|
||||||
i = Min(var2ndigits - 1, res_ndigits - i1 - 3);
|
{
|
||||||
dig_i1_2 = &dig[i1 + 2];
|
int i2limit = Min(var2ndigits, res_ndigits - i1 - 2);
|
||||||
for (i2 = 0; i2 <= i; i2++)
|
int *dig_i1_2 = &dig[i1 + 2];
|
||||||
|
|
||||||
|
for (i2 = 0; i2 < i2limit; i2++)
|
||||||
dig_i1_2[i2] += var1digit * var2digits[i2];
|
dig_i1_2[i2] += var1digit * var2digits[i2];
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now we do a final carry propagation pass to normalize the result, which
|
* Now we do a final carry propagation pass to normalize the result, which
|
||||||
|
Loading…
x
Reference in New Issue
Block a user