News:

Herr Otto Partz says you're all nothing but pipsqueaks!

Main Menu
Menu

Show posts

This section allows you to view all posts made by this member. Note that you can only see posts made in areas you currently have access to.

Show posts Menu

Messages - llm

#1
and wouldn't be a if-less vec_transform not faster?

or is failed branch prediction not a problem with older CPUs and multiply is much slower

    void vec_transform(const VECTOR* src, const MATRIX* mat, VECTOR* dst)
    {
        dst->x = ((int32_t)mat->m11 * src->x + (int32_t)mat->m12 * src->y + (int32_t)mat->m13 * src->z) >> SCALE_BITS;
        dst->y = ((int32_t)mat->m21 * src->x + (int32_t)mat->m22 * src->y + (int32_t)mat->m23 * src->z) >> SCALE_BITS;
        dst->z = ((int32_t)mat->m31 * src->x + (int32_t)mat->m32 * src->y + (int32_t)mat->m33 * src->z) >> SCALE_BITS;
    }
#2
cleanuped version of the math.h usage with some tests

#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#define _USE_MATH_DEFINES
#include <math.h>
#include "stunts_math.hpp"

namespace math_h_test
{
#define SCALE_BITS 14
#define SCALE (1 << SCALE_BITS)

// 0x400 steps = 2Pi
#define ANGLE_TO_RAD(a) ((a) * (M_PI / 512.0))
#define RAD_TO_ANGLE(r) ((r) * (512.0 / M_PI))

#define TO_FIXED(x)   ((int16_t)lround((x) * SCALE))
#define FROM_FIXED(x) ((double)(x) / SCALE)

    int16_t int_sin_math(uint16_t angle)
    {
        return TO_FIXED(sin(ANGLE_TO_RAD(angle)));
    }

    int16_t int_cos_math(uint16_t angle)
    {
        return TO_FIXED(cos(ANGLE_TO_RAD(angle)));
    }

    int16_t int_atan2_math(int16_t x, int16_t y)
    {
        double ang = atan2((double)x, (double)y);
        return (int16_t)lround(RAD_TO_ANGLE(ang));
    }

    int16_t int_hypot_math(int16_t x, int16_t y)
    {
        return TO_FIXED(hypot(FROM_FIXED(x), FROM_FIXED(y)));
    }

    int16_t int_hypot_3d_math(const VECTOR* v)
    {
        double dx = FROM_FIXED(v->x);
        double dy = FROM_FIXED(v->y);
        double dz = FROM_FIXED(v->z);
        return TO_FIXED(sqrt(dx * dx + dy * dy + dz * dz));
    }

    void test_sin_cos(void)
    {
        printf("=== SIN/COS compare ===\n");
        int max_diff_sin = 0, max_diff_cos = 0;

        for (int a = -0x400; a <= 0x400; ++a) {
            int16_t ref_sin = int_sin((uint16_t)a);
            int16_t ref_cos = int_cos((uint16_t)a);
            int16_t new_sin = int_sin_math((uint16_t)a);
            int16_t new_cos = int_cos_math((uint16_t)a);

            int diff_sin = abs(ref_sin - new_sin);
            assert(diff_sin == 0);

            int diff_cos = abs(ref_cos - new_cos);
            assert(diff_cos == 0);
        }
    }

    void test_atan2(void)
    {
        printf("=== ATAN2 Compare ===\n");
        int max_diff = 0;

        for (int y = -0x400; y <= 0x400; y += 64) {
            for (int x = -0x400; x <= 0x400; x += 64) {
                int16_t ref = int_atan2(x, y);
                int16_t newv = int_atan2_math(x, y);
                int diff = abs(ref - newv);

                assert(diff <= 1);
            }
        }
    }

    void test_hypot(void)
    {
        printf("=== HYPOT Compare ===\n");
        int max_diff = 0;

        for (int y = -0x400; y <= 0x400; y += 64) {
            for (int x = -0x400; x <= 0x400; x += 64) {
                int16_t ref = int_hypot(x, y);
                int16_t newv = int_hypot_math(x, y);
                int diff = abs(ref - newv);

                assert(diff <= 3);

                if (diff > 1)
                    printf("hypot(%4d,%4d): ref=%5d new=%5d diff=%3d\n",
                        x, y, ref, newv, diff);
            }
        }
    }

    void test_hypot3d(void)
    {
        printf("=== HYPOT_3D Compare ===\n");
        int max_diff = 0;
        VECTOR v;

        for (int z = -0x200; z <= 0x200; z += 64)
            for (int y = -0x200; y <= 0x200; y += 64)
                for (int x = -0x200; x <= 0x200; x += 64) {
                    v.x = x; v.y = y; v.z = z;
                    int16_t ref = int_hypot_3d(&v);
                    int16_t newv = int_hypot_3d_math(&v);
                    int diff = abs(ref - newv);

                    assert(diff <= 4);

                    if (diff > 1)
                        printf("hypot3d(%4d,%4d,%4d): ref=%5d new=%5d diff=%3d\n",
                            x, y, z, ref, newv, diff);
                }
    }

    int main(void)
    {
        test_sin_cos();
        test_atan2();
        test_hypot();
        test_hypot3d();
        return 0;
    }
}
#3
@dstien

are you interested in generators for the "magic" value tables?

creates the values for "Quarter-wave lookup table for sin_fast()"

    int16_t sine_table2[SIN_STEPS + 1];
    for (int i = 0; i <= SIN_STEPS; ++i) { // +1 for last 90 degree
        double angle = (M_PI / 2.0) * i / SIN_STEPS; // 0 .. Pi/2
        int16_t val = (int16_t)round(SIN_SCALE * sin(angle));
        sine_table2[i] = val;
    }

creates the values for "One-octant (45°) lookup table of the arctangent in 0x100 steps + the peak"

#define ATAN_STEPS 0x100      // 256 steps
#define ATAN_SCALE 128.0 / (M_PI / 4.0)  // map radians 0..Pi/4 --> 0..128

    std::vector<uint8_t> atan_table2(ATAN_STEPS + 1);
    for (int i = 0; i <= ATAN_STEPS; ++i) {
        double ratio = (double)i / ATAN_STEPS;       // x/y ratio, 0..1
        double angle = atan(ratio);                  // radians, 0..Pi/4
        uint8_t val = (uint8_t)round(angle * ATAN_SCALE);
        atan_table2[i] = val;
    }

and math.h based implementations that scale to the integer range of the original routines

    int16_t int_atan2(int16_t x, int16_t y)
    {
        if (x == 0 && y == 0) return 0;
        double ang = atan2((double)x, (double)y);
        int16_t v = round(ang * 512.0 / M_PI);
        if (v > 512) v = 512;
        if (v < -511) v = -511;
        return v;
    }

and

    int16_t int_sin(uint16_t angle) {
        double radians = angle * M_PI / 512.0;  // 10-Bit angle -> 0..2Pi
        return (int16_t)(sin(radians) * SCALE + 0.5);   // rounded
    }

with a maximal error of 1 (which is still more correct but not exact as stunts implements it)
#4
@dstien

do you test/compare 16- and 32bit (with wcl386 on DOS or even gcc/clang on Linux?) built results of the pure C functions tests?
to make clear that they give the same results

im currently doing wcl/wcl386 for dos,clang-32/64bit,msvc-32/64bit for windows and gcc/clang on linux builds for my simple test - thanks to your switch to "fixed-width integer" makes it easy to build the code, also using clang-tidy on it :)

#5
@Matei

CAS explained his views on third-parties serveral times over the years, he don't like (someway hates) 3rd-party dependencies (and the deployment/dependecy "burden" - he preferse tiny single exe builds without dependencies - like in the old times, 40 years ago) and FreeBasic is giving him an enviroment were he can get that and still be productive - its just the way he personally wants to do coding, and he wants to do it in a Basic-Language, and he wants the FreeBasic graphics interface - even if its nearly the same as SDL (its just a tiny fraction of less functions needed to get it running)

its clear for everyone that using third-parties, install and deploy them is just as normal in professional developing buisness as drinking water, writing non trivial multi-platform applications without third-parties just don't work anymore - you can do it but no one will pay you re-inventing wheels and consuming endless time doing it

maybe CAS does not work in professional software development, in a very small (lucky) niche were he could prevent using third-parties over a very long time or he choosed that way for his hobby-time - he chooses FreeBasic because it fits his needs fully out-of-the-box

it makes no sense to argue about it - he choose it for reasons we don't see as problematic as he do

but still, he creates nice tools for the community :-*
#6
what i can see so far:

ax seems to be undefined if x and y = 0 in the asm routine

   
    int16_t a;
    __asm{
        mov ax,0
        push ax
        push ax
        mov ax,1234
        call int_atan2_asm
        add sp,4
        mov a,ax
    }
    printf("a:%d\n", a);
    return 0;

returns 1234

so the question is: does Stunts rely on this "random" behavior? and if - how to fake it in C?

- bx is not safed (could that be problematic in combination with C code calls?)

#7
test

    const int16_t min = -2;
    const int16_t max = +2;
   
    for(int i = min; i < max; ++i)
    {
      printf("[%d](x=0,y=0) a: %d, c: %d\n", i, int_atan2_asm(0, 0),int_atan2(0, 0));
    }

    for (int16_t x = min; x < max; ++x)
    {
        for (int16_t y = min; y < max; ++y)
        {
            int16_t a = int_atan2_asm(x, y);
            int16_t c = int_atan2(x, y);
            printf("x=%d, y=%d -> C=%d, ASM=%d\n", x, y, c, a);
        }
    }

result

[-2](x=0,y=0) a: 0, c: 0
[-1](x=0,y=0) a: 0, c: 0
[0](x=0,y=0) a: 0, c: 0
[1](x=0,y=0) a: 0, c: 0
x=-2, y=-2 -> C=-384, ASM=-384
x=-2, y=-1 -> C=-332, ASM=-332
x=-2, y=0 -> C=-256, ASM=-256
x=-2, y=1 -> C=-180, ASM=-180
x=-1, y=-2 -> C=-436, ASM=-436
x=-1, y=-1 -> C=-384, ASM=-384
x=-1, y=0 -> C=-256, ASM=-256
x=-1, y=1 -> C=-128, ASM=-128
x=0, y=-2 -> C=512, ASM=512
x=0, y=-1 -> C=512, ASM=512
x=0, y=0 -> C=0, ASM=28 <======= ???
x=0, y=1 -> C=0, ASM=0
x=1, y=-2 -> C=436, ASM=436
x=1, y=-1 -> C=384, ASM=384
x=1, y=0 -> C=256, ASM=256
x=1, y=1 -> C=128, ASM=128

the asm code returns also 0 if i rotate the calls so first C then asm or the result-value of asm changes if i disable the C call complete

            int16_t c = int_atan2(x, y);
            int16_t a = int_atan2_asm(x, y);

maybe im calling the asm-code the wrong way but it only fails for 1 of thousands of calls?
#8
Quote from: dstien on October 09, 2025, 07:44:31 PM
Quotecompile with current Watcom V2 on Windows for DOS 16bit Model small exe using wcl+wasm
the test run was done on a real DOS 6.22 on VMWare Player (because dosbox failed to run the complete test without hanging)

Does this include the fix I pushed the other day? Strangely I can't reproduce it on my end with dosemu2 or DOSBox. Could you attach the exe file? I'm keen to have a look.

no it didn't include the fix - with the fix i only get one difference on DOS, XP/Win7(32bit) running on VMWare

on x=0, y=0  the C code returns 0, asm returns 512

(building the C code for 32bit windows results also in 0 for x=0, y=0 )

but that does only happen in the loop with x/y not when calling the routines directly with 0/0 - the result of the asm code for 0/0 seems to be only non null in the x/y loop?

FYI: comparing all values from C with x/y -1000...1000 showing no difference between a 16 and 32bit build

Quotepeak values crashes both implementations

for me the testrun just takes ages on dosbox

my Test:

this i my free-standing test (i've copied the asm and C code into the test - quick & dirty):

https://filebin.net/gw4diq3oa6l2gas2/restunts2_tests.zip

for testing speed:

im using DOS 6.22 in VMWare Player because its super much faster - compared to dosbox - just install DOS in VMWarePlayer and share Files using a floppy image in VMWare, maybe a qemu+KVM DOS is similar fast, or just a 32bit Windows OS...

to create an floppy-image using linux

mkfs.msdos -C myfloppy.img 1440
mcopy -i myfloppy.img test16.exe ::/
mdir -i myfloppy.img ::/

#9
Quote from: Daniel3D on October 08, 2025, 09:44:09 AMStill, the fact remains that IDA is a pain in the but, especially for those that don't use it enough to warrant a paid licence. So whatever way you look at it, the Ghidra conversion makes a huge difference.

100% ACK
#10
@dstien

https://github.com/dstien/restunts2/commit/daa05e093c3e56bfa6a7a9610173dc5db2db8cfe

will you also try to compare to the original implementation - something like int_atan2 disassembly + C port over the complete integer range or is something like that too much?

for example: some x/y combinations don't give the same result

range from x/y -1000...1000 - the list contains only different result comparing the C port and the original disassembled function

https://pastebin.com/f9nucieg

my test-code looks somway like this

   
    const int16_t min = -1000;
    const int16_t max = +1000;

    for (int16_t x = min; x < max; ++x)
    {
        for (int16_t y = min; y < max; ++y)
        {
            int16_t a = int_atan2_asm(x, y);
       
            int16_t c = int_atan2(x, y);
           
            if(c != a)
            {
              printf("x=%d, y=%d -> C=%d, ASM=%d %s\n",
                     x, y, c, a, (c == a) ? "OK" : "!!FEHLER!!");
            }
        }
    }

compile with current Watcom V2 on Windows for DOS 16bit Model small exe using wcl+wasm
the test run was done on a real DOS 6.22 on VMWare Player (because dosbox failed to run the complete test without hanging)

but im not sure what the defined range for these functions are

#11
QuoteParticularly the database from the program whose vendor has a history of refusing to sell to people outside the infosec clique.

isn't that like the myth that IDA dropped DOS support (which is only true for IDA freeware versions)

i know serveral private people that bought an IDA license - like me - without any infosec background

#13
Creating the longest possible Ski Jump in The Games: Winter Challenge (reversing, tool development)

https://mrwint.github.io/winter/writeup/writeup2.html
#14
Stunts Related Programs / Re: Track viewer for DOS
August 13, 2025, 10:36:32 AM
QuoteGithub belongs to Micro$oft, besides being hard to use. I don't know about gitlab, but I'm tired of these.

i need to use all of them in my worklife - Linux,Windows,Mac,gitlab,github,sourceforge,svn,git,perforce,etc. and every other development tool needed from console, maketools, OSes up to all compilers and IDEs available - so i stopped beeing picky but try to become familiar with every one of them - and so far it never failed me and i can use every tool at every of my customers without problems - but thats only my story :)
#15
QuoteI still have the built exes from HerrNove and I never compiled the branch myself. I was referring to compiling my game.

he did get that wrong - and you also :)