and wouldn't be a if-less vec_transform not faster?
or is failed branch prediction not a problem with older CPUs and multiply is much slower
or is failed branch prediction not a problem with older CPUs and multiply is much slower
Code Select
void vec_transform(const VECTOR* src, const MATRIX* mat, VECTOR* dst)
{
dst->x = ((int32_t)mat->m11 * src->x + (int32_t)mat->m12 * src->y + (int32_t)mat->m13 * src->z) >> SCALE_BITS;
dst->y = ((int32_t)mat->m21 * src->x + (int32_t)mat->m22 * src->y + (int32_t)mat->m23 * src->z) >> SCALE_BITS;
dst->z = ((int32_t)mat->m31 * src->x + (int32_t)mat->m32 * src->y + (int32_t)mat->m33 * src->z) >> SCALE_BITS;
}

