News:

Herr Otto Partz says you're all nothing but pipsqueaks!

Main Menu
Menu

Show posts

This section allows you to view all posts made by this member. Note that you can only see posts made in areas you currently have access to.

Show posts Menu

Messages - llm

#16
and something to learn for you - how this cdecl,stack stuff for function calls work:

seg016:0008                 push    [bp+arg_4] ; 2 byte push - parameter 2
seg016:000B                 push    [bp+arg_2] ; 2 byte push - parameter 1
seg016:000E                 push    [bp+arg_0] ; 2 byte push - parameter 0
seg016:0011                 call    sub_30F9D
seg016:0016                 add     sp, 6 ; 3*2

the add sp,6 after the call means that the stack-pointer (where the parameter of sub_30F9D laying)
cleanups 6 bytes from the stack - so sub_30F9D is very likely a cdecl function - because these needs to do that - and 3 pushes = 3 parameter
and the 6 bytes are comming from 3 pushes a' 2 bytes before

this 80(1)86 code only allows 2 byte pushes onto the stack - so even bytes are pushed as words
but there are also 32bit values (for example far-ptr with segment+offset) that are pushed as parts
in C is this for example a void "test(int far* value)" -> segment/offset on stack as 2 pushes
#17
such a function

seg016:0002 locate_many_resources proc far          ; CODE XREF: load_intro_resources+2A␘P
seg016:0002                                        ; run_opponent_menu+4A␘P
seg016:0002                                        ; load_skybox+60␘P
seg016:0002                                        ; load_sdgame2_shapes+2C␘P
seg016:0002                                        ; setup_intro+2E␘P
seg016:0002                                        ; setup_car_shapes+9C␘P
seg016:0002                                        ; setup_car_shapes+B4␘P
seg016:0002                                        ; setup_car_shapes+D3␘P
seg016:0002                                        ; loop_game+34␘P
seg016:0002                                        ; load_tracks_menu_shapes:loc_2A2E3␘P
seg016:0002                                        ; load_tracks_menu_shapes:loc_2A2F9␘P
seg016:0002                                        ; load_tracks_menu_shapes+53␘P
seg016:0002
seg016:0002 arg_0          = word ptr  6
seg016:0002 arg_2          = word ptr  8
seg016:0002 arg_4          = word ptr  0Ah
seg016:0002 arg_6          = word ptr  0Ch
seg016:0002
seg016:0002                push    bp
seg016:0003
seg016:0003 loc_367B3:
seg016:0003                mov    bp, sp
seg016:0005
seg016:0005 loc_367B5:
seg016:0005                jmp    short loc_367D9
seg016:0005 ; ---------------------------------------------------------------------------
seg016:0007                align 2
seg016:0008
seg016:0008 loc_367B8:                              ; CODE XREF: locate_many_resources+2D␙j
seg016:0008                push    [bp+arg_4]
seg016:000B
seg016:000B loc_367BB:
seg016:000B                push    [bp+arg_2]
seg016:000E
seg016:000E loc_367BE:
seg016:000E                push    [bp+arg_0]
seg016:0011
seg016:0011 loc_367C1:
seg016:0011                call    locate_shape_fatal
seg016:0016
seg016:0016 loc_367C6:
seg016:0016                add    sp, 6
seg016:0019
seg016:0019 loc_367C9:
seg016:0019                mov    bx, [bp+arg_6]
seg016:001C
seg016:001C loc_367CC:
seg016:001C                add    [bp+arg_6], 4
seg016:0020
seg016:0020 loc_367D0:
seg016:0020                mov    [bx], ax
seg016:0022                mov    [bx+2], dx
seg016:0025                add    [bp+arg_4], 4
seg016:0029
seg016:0029 loc_367D9:                              ; CODE XREF: locate_many_resources:loc_367B5␘j
seg016:0029                mov    bx, [bp+arg_4]
seg016:002C
seg016:002C loc_367DC:
seg016:002C                cmp    byte ptr [bx], 0
seg016:002F                jnz    short loc_367B8
seg016:0031                pop    bp
seg016:0032                retf
seg016:0032 locate_many_resources endp

most of the inner labels are complete unused

fresh IDA import

seg016:0002 sub_367B2       proc far                ; CODE XREF: sub_10786+2A␘P
seg016:0002                                         ; sub_1293C+4A␘P ...
seg016:0002
seg016:0002 arg_0           = word ptr  6
seg016:0002 arg_2           = word ptr  8
seg016:0002 arg_4           = word ptr  0Ah
seg016:0002 arg_6           = word ptr  0Ch
seg016:0002
seg016:0002                 push    bp
seg016:0003                 mov     bp, sp
seg016:0005                 jmp     short loc_367D9
seg016:0005 ; ---------------------------------------------------------------------------
seg016:0007                 nop
seg016:0008
seg016:0008 loc_367B8:                              ; CODE XREF: sub_367B2+2D␙j
seg016:0008                 push    [bp+arg_4]
seg016:000B                 push    [bp+arg_2]
seg016:000E                 push    [bp+arg_0]
seg016:0011                 call    sub_30F9D
seg016:0016                 add     sp, 6
seg016:0019                 mov     bx, [bp+arg_6]
seg016:001C                 add     [bp+arg_6], 4
seg016:0020                 mov     [bx], ax
seg016:0022                 mov     [bx+2], dx
seg016:0025                 add     [bp+arg_4], 4
seg016:0029
seg016:0029 loc_367D9:                              ; CODE XREF: sub_367B2+3␘j
seg016:0029                 mov     bx, [bp+arg_4]
seg016:002C                 cmp     byte ptr [bx], 0
seg016:002F                 jnz     short loc_367B8
seg016:0031                 pop     bp
seg016:0032                 retf
seg016:0032 sub_367B2       endp
#18
Quote from: Daniel3D on October 16, 2022, 06:23:19 PMCan it be that the ida has mistaken them for labels and that they are just values?

normaly not - i also can't find any code that uses that lables - they are just there...

Quote from: Daniel3D on October 16, 2022, 06:23:19 PMI don't know how much the ida has evolved since the first disassembly. Also from what I've read about the process I have a feeling that you have a bit more experience with this. So maybe your settings create a cleaner result..

even an old version of IDA doesn't create these labels, strange - but they are not everywere only some functions

Quote from: Daniel3D on October 16, 2022, 06:23:19 PMThat would be unfortunate because that would mean that it is smart to redo the entire process. And there has been done a lot of research and analyzing that has to be copied and checked.

IDA is able to store the analyze results as script - everythingin IDA is script-based - for reproduciblity
sadly it doesn't work very good for downgrading to the freeware version :(

another thing that i've found is that very few functions are typed - except the 3d engine everything in stunts in C based so every function from C following the cdecl calling convention (https://en.wikibooks.org/wiki/X86_Disassembly/Calling_Conventions#CDECL), reverse order stack pushes for the parameters - very well defined

normaly you start very early to annotate the functions in the disassembly with IDA to be clean cdecl defined - that helps IDA to infere more about the code and spread type infos over the code
it seems that we started with that but never done it for most of the functions - that makes the code more
harder to read - it would be a big win to annotate the C-functions properly, and even for non cdecl functions there is the __usercall (https://www.hex-rays.com/products/ida/support/idadoc/1361.shtml) feature of IDA that allows to annotated registers etc. as parameter to descripe the "interface" of a pure-assembler function better

my goal is it to write a simple IDA script that contains all functions + names + signatures
and global structs and its usage to feed it at a very early state of analyse to IDA, so IDA can infer more
maybe its also possible to use this script-variant on Ghidra or the freeware version of IDA - to make it more easy to play with the information in open source or freewaret tools

the cdecl information would be enough for me to trace every cdecl call from the game in my dosbox extension - its formalized enough that
i just need the signature and then im able to print what the parameter content and return values are - that helps sometimes to understand better
what the code is doing (a trace every function call)
#19
Quote from: Daniel3D on October 16, 2022, 03:28:12 PMThank you. That really makes it clearer. I kind of deducted the functionality but this is a lot more detailed.

the more you understand the better...

Quote from: Daniel3D on October 16, 2022, 03:28:12 PMMy guess is that if the non symbolic offsets are fixed and the para alignment (do i say that correctly? You know what I mean) is done. Then it may be very easy to expand the horizons.

it would reduce problems alot

im currently a little bit confused about the current state of some functions in the asmorig - some of the functions you've showed me are full of unused labels, messing the asm code a little
these labels do not exists if i freshly analyze the current game exe with IDA - need to find out what these labels are for
#20
Quote from: Daniel3D on October 15, 2022, 10:44:16 PM*Learned that form CAS.  8)

yes correct:

seg003:38BC                mov    al, [bp+arg_0] <-- al = arg0
seg003:38BF                mov    byte_46167, al
seg003:38C2                mov    byte_3B8F6, 1
seg003:38C7                cbw    <== ax = signe-extended(al)
seg003:38C8                mov    cx, ax
seg003:38CA                shl    ax, 1
seg003:38CC                shl    ax, 1
seg003:38CE                shl    ax, 1
seg003:38D0                add    ax, cx
seg003:38D2                add    ax, offset aDesert ; "desert"
seg003:38D5                push    ax <-- first parameter of file_load_shape2d_fatal_thunk
seg003:38D6                call    file_load_shape2d_fatal_thunk

CBW: https://c9x.me/x86/html/file_module_x86_id_27.html

its ax = 9 * cbw(arg0) + offset aDesert

so in C that would be like

aDesert[arg0*9]

and the aDesert could be just the first element - but nothing todo with desert

or some other strange way to adress a array or member inside of aDesert

and the "9" is the max size of the string

dseg:0140 aDefault        db 'DEFAULT',0
dseg:0148                db    0
dseg:0149                db    0

==> table with 5, 8+1 byte strings
dseg:014A aDesert        db 'desert',0,0,0      ; DATA XREF: sub_1D7A2+40␘o
dseg:0153 aTropical      db 'tropical',0
dseg:015C aAlpine        db 'alpine',0,0,0
dseg:0165 aCity              db 'city',0,0,0,0,0
dseg:016E aCountry        db 'country',0,0

so in C that would be "char[9] background[5]" and arg0 is then 0-4

dseg:0177                db    0
dseg:0178                db    0
dseg:0179                db    0

in C++ that would be exactly (and 100% binary equal)

using background_name_t = char[9];
const background_name_t background_names[5] // the missing 0 is implicitly added due to beeing a c-string and a global var
{
 "desert",
 "tropical",
 "alpine",
 "city",
 "country"
};

so C/C++ knows that every entry in background_names is a 9 byte string - so
the arithmetic of multiplying by 9 is done implicit - based on the type definition

ptr to background_names is equal to background_names at position of "desert"
thats the reason that IDA thinks the code offsets aDesert directly
but the code just referes the whole table

and then just

file_load_shape2d_fatal_thunk(background_names[arg0]);

the same as

ax = 9 * cbw(arg0) + offset aDesert
push ax
call file_load_shape2d_fatal_thunk

as you can see the complexity reduce is big, comparing C with asm :)

compiling this C/C++ code with the original Stunts 16bit compiler "Microsoft C 5.1" reveals this code

#include <string.h>

typedef char background_name_t[9];

const background_name_t background_names[5] =
{
 "desert",
 "tropical",
 "alpine",
 "city",
 "country"
};

int main(int argc, char* argv[])
{
 return strlen(background_names[argc]);
}

the generated assembler code for this small snipped looks very much like the original code (or can be tuned to look exact the same)

seg000:0010 ; int __cdecl main(int argc, const char **argv, const char **envp)
seg000:0010 _main          proc near              ; CODE XREF: start+8D␙p
seg000:0010
seg000:0010 arg_0          = word ptr  4
seg000:0010
seg000:0010                push    bp
seg000:0011                mov    bp, sp
seg000:0013                xor    ax, ax
seg000:0015                call    __chkstk

here is your original assembler code (ignoring cbw) as a result from my C/C++ code
seg000:0018                mov    ax, [bp+arg_0]
seg000:001B                mov    cx, ax
seg000:001D                shl    ax, 1
seg000:001F                shl    ax, 1
seg000:0021                shl    ax, 1
seg000:0023                add    ax, cx
seg000:0025                add    ax, offset aDesert ; "desert"
seg000:0028                push    ax              ; char *

seg000:0029                call    _strlen
seg000:002C                add    sp, 2
seg000:002F                pop    bp
seg000:0030                retn
seg000:0030 _main          endp

also the data-segment part of the background tables is 100% binary identical

dseg:003C                db  43h ; C
dseg:003D                db  6Fh ; o
dseg:003E                db  72h ; r
dseg:003F                db  70h ; p
dseg:0040                db  11h
dseg:0041                db    0
dseg:0042 aDesert        db 'desert',0          ; DATA XREF: _main+15␘o
dseg:0049                db    0
dseg:004A                db    0
dseg:004B                db  74h ; t
dseg:004C                db  72h ; r
dseg:004D                db  6Fh ; o
dseg:004E                db  70h ; p
dseg:004F                db  69h ; i
dseg:0050                db  63h ; c
dseg:0051                db  61h ; a
dseg:0052                db  6Ch ; l
dseg:0053                db    0
dseg:0054                db  61h ; a
dseg:0055                db  6Ch ; l
dseg:0056                db  70h ; p
dseg:0057                db  69h ; i
dseg:0058                db  6Eh ; n
dseg:0059                db  65h ; e
dseg:005A                db    0
dseg:005B                db    0
dseg:005C                db    0
dseg:005D                db  63h ; c
dseg:005E                db  69h ; i
dseg:005F                db  74h ; t
dseg:0060                db  79h ; y
dseg:0061                db    0
dseg:0062                db    0
dseg:0063                db    0
dseg:0064                db    0
dseg:0065                db    0
dseg:0066                db  63h ; c
dseg:0067                db  6Fh ; o
dseg:0068                db  75h ; u
dseg:0069                db  6Eh ; n
dseg:006A                db  74h ; t
dseg:006B                db  72h ; r
dseg:006C                db  79h ; y
dseg:006D                db    0
dseg:006E                db    0
dseg:006F                db    0
dseg:0070 word_105D0      dw 0                    ; DATA XREF: start+4A␘w
#21
Quote from: llm on October 14, 2022, 01:56:05 PMThere is a function that loads horizons. That function gets its filenames from Dseg.

how is that function called? be always precise :)
#22
Quote from: Daniel3D on October 14, 2022, 12:49:10 PMThere is a function that loads horizons. That function gets its filenames from Dseg.
You can not add a name there because it will create an offset and make all non-symbolic functions in the whole code fail. If we fix that, we can easily (probably not ::) ) create new horizons.
it's a trivial, unimportant change, but a nice small project to make and to kinda test stability.

yes, but you could add it to the end of the data-segment, and move the stack-segment a little for example
- the stack location is only needed in very early stage of the game while initilizing, this offset is already symbolic, there are some options that do not make the offsets go corrupt, but full symbolic is always the best we can have


#23
Quote from: Daniel3D on October 14, 2022, 12:42:16 PMAnd it does not matter where Func0 is. If the new code is before the location that Func0 is looking for it fails.

yes 100% correct - its not called fails, but "undefined behavior"
its not clear what happens when the value gets read from the wrong offset - nearly everything is possile - like random-problem-generator, it could be that there is always 0 and the correct code always wanted 0, or there is a ever changing value that most of the time is in a range were the function can work with and producing no visual or audio glitches, maybe some strange physic behavior while driving a special way
#24
Quote from: Daniel3D on October 14, 2022, 12:16:40 PM
Quote from: llm on October 14, 2022, 09:10:47 AMwhat happens if offsets are not symbolic?

Code Select Expand
0x3440 func0
0x3440  mov ax,0x3456
0x3442  call XYZ
0x3448
0x3450 func1
0x3451   some code <-- the above non-symbolic offset will get wrong if you add/remove code here
0x3452 added code
0x3453 added code
0x3454 added code
0x3455
0x3456: Something entirely different (not: dw some_value 234)
0x3457
0x3458
0x3459: dw some_value 234
Like this?
Then func0 fails. I know. That is why getting rid of them is important.

yes 100% correct - but "fails" isnt defined here - it could be that the algorithm works still because its just not that robust, or there is a identical or nearly identical value at the target offset
think of values like 0,255,-1 or something there a very typical around so it "could" still work
#25
again, for your daily training :)

what happens if offsets are not symbolic?

0x3440 func0
0x3440  mov ax,0x3456
0x3442  call XYZ
0x3448
0x3450 func1
0x3451   some code <-- the above non-symbolic offset will get wrong if you add/remove code here
0x3452
0x3453
0x3454
0x3455
0x3456: dw some_value 234

func0
  mov ax,offset some_value
  call XYZ

func1
  some code <-- the above symbolic offset will not get wrong if you add/remove code here

dw some_value 234
#26
nice findings - i will have a look and check what of these a real offsets - but i think at least 50% are very likely offsets
#27
Quote from: Daniel3D on October 11, 2022, 01:03:15 PMThis should be one to.
Lucky find on my phone..

no thats the DOS-API (int 21h, function=ah=4Ch=exit program, with error=al=0FFh result == -1)
http://www.osfree.org/doku/en:docs:dos:api:int21:4c

could be written as

mov ah,4Ch
mov al,0FFh ; -1
int 21h

or

mov ax,4CFFh
int 21h

you always need to analyse the context around a little - everything in assembler is more or less global, typeless (pointer, value, ... everything is possible)

C port of that is

exit(-1);
#28
Quote from: Daniel3D on October 11, 2022, 12:38:31 PMI'm not starting with ida.

would be the easiest - but IDA is commercial, costs ~400$ in the home edition

i would love to go back to IDA Freeware 5 (the only free version that still supports DOS)
official download available on ScummVM homepage: https://www.scummvm.org/news/20180331/

but upgrading the IDA database (idb) is a one-way-ticket - and im currently working with 6.8

but you should install the freeware - give you a good idea how that all works, even if IDA is not the latest of the latest - most reversing projects using this freeware version (or Ghidra - which is sometimes problematic with segment/offset support)
#29
just to give you a feeling what the code does in one of your examples:

seg000:053A _ask_dos:                              ; CODE XREF: stuntsmain+43D␘j
seg000:053A  sub    ax, ax
seg000:053C  push    ax  ; show_dialog param 9
seg000:053D  push    ax  ; show_dialog param 8
seg000:053E  push    dialogarg2  ; show_dialog param 7
seg000:0542  mov    ax, 0FFFFh
seg000:0545  push    ax  ; show_dialog param 6
seg000:0546  push    ax  ; show_dialog param 5

seg000:0547      mov    ax, offset aDos ; "dos"
seg000:054A      push    ax ; locate_text_res param 3
seg000:054B      push    word ptr mainresptr+2 ; locate_text_res param 2
seg000:054F      push    word ptr mainresptr ; locate_text_res param 1
seg000:0553      call    locate_text_res
seg000:0558      add    sp, 6 -> 6 bytes removed from strack (du to the previous 3 pushes 'a 2 bytes)

seg000:055B  push    dx  ; show_dialog param 4
seg000:055C  push    ax  ; show_dialog param 3
seg000:055D  mov    ax, 1
seg000:0560  push    ax  ; show_dialog param 2
seg000:0561  mov    ax, 2
seg000:0564  push    ax  ; show_dialog param 1
seg000:0565  call    show_dialog
seg000:056A  add    sp, 12h ; 12h  = 18 bytes bytes on stack removed (due to the previus 9 pushes)

this is the C-port of that asm-code

  locate_text_res(mainresptr.offset, mainresptr.segment, "dos"); // sets dx and ax (could be a ptr)
  show_dialog(2, 1, ax, dx, -1, -1, dialogarg2, 0, 0);
#30
Quote from: Daniel3D on October 11, 2022, 10:51:24 AMThis is the last for now. Enough to test if I am finding them correctly .. And to see if it is useful..
(I will make more compact logs of others I find when useful to continue. I did it this way, so you can easily see if I make obvious mistakes)
seg000 Line 1795:     mov     ax, 0FFFEh 
    call    shape3d_load_all
    mov     ax, 0C8h ; 'È'
    push    ax
    mov     ax, 140h
    push    ax
    mov     ax, 28h ; '('
    push    ax
    push    ax
    call    set_projection
    add     sp, 8
    mov     ax, 0FFFEh
    push    ax
    call    init_game_state
    add     sp, 2
    call    sprite_copy_wnd_to_1
    push    skybox_grd_color
    call    sprite_clear_1_color
if this is one there are 3 other hits on "ax, 0FFFEh"


0FFFEh is not a valid looking offset - just too big, and 0FFFEh as signed is -2 - so its maybe some sort
of parameter or really the value 65534

you need to understand hex/dec, signed/unsigned and type-size very well do get a "feeling" what that number could be - combined with knowledge about the called functions