Since some days, i'm working on a way to make compatible addin for SH4 (power graphic 2), without having the sources. And i'm comming here to share with you the first result !
We know actually several problems that cause addin incompatibility with SH4 calcs. They will each need to be resolved, but one at time, here is the current state :
- The
IsKeyDown function from FxLib :
OkProblem : Actually the IsKeyDown Function read directly the keyboard input. But with power graphic 2, the cpu changes, so the keyboard connections to the CPU change so the IsKeyDown function cannot work with this new CPU.
Solution : As Casimo show us with his solution in C, Power Graphic 2 calculators have a specific keyboard register that we can read easely. The solution is just to replace the assembleur code of the IsKeyDown function by a new that jump to a compatible function that I add to the end of the file (just need to rewrite the header, it's easy thank to Simon Lothar and his doc)
So here is the code
- Code: Select all
_IsKeyUpReplacement ; put this at offset 0xc of the IsKeyUp function. (the first two byte replaced : 63f0) THe length of the IsKeyUp function is 0x4c
;before : there is call of _KeyCodeConvert that put an array of two byte that respectivly contain the col and the line in the stack
mov r15,r4 ; first param of my _GetKeyState function
mov #2,r5 ; set slowmode of my _GetKeyState function
mov.l GetKeyStateAddress2,r0
jsr @r0 ;call _GetKeyState
nop
nop
;after _GetKeyState
not r0,r0
and #1,r0
add #4,r15
lds.l @r15+,pr
rts
nop
GetKeyStateAddress2:
.data.l h'00301234 ; addres of my compatible _GetKeyState function : the set this address manually after compilation
my compatible _GetKeyState: Show This function is compatible with both cpu and can wait a defined time at the end to emulate old slow function. This function is fast, (not so fast as the KeyDown function but this one is not compatible with new calc) so i think if i optimize the code, it will possible to use it for other new programs. But I've just learn the asm, so It's maybe possible to do better.
- Code: Select all
;param 1 (in r4) : Adress of an array of two unsigned char, with in the first cell the col, and in the second the row
;param 2 (in r5) : slowMode : this determine the time this function will wait to emulate the olds functions
; -n : number of loop
; 0 : fatest as possible
; 1 : = duration of IsKeyDown function
; 2 : = duration of IsKeyUp function
; 3 : = duration of KeyDown function
;return (in r0) 1 if the key is pressed.
_GetKeyState
;First put actual value in the stack
sts.l pr,@-r15
mov.l r1,@-r15
mov.l r2,@-r15
mov.l r3,@-r15
mov.l r6,@-r15
mov.l r7,@-r15
mov.l r8,@-r15
mov.l r9,@-r15
mov r4,r8 ; first param
mov r5,r9 ; second param
;check the os version with the syscall 0x0015 | if I use only 1 byte for chars, and 2 for short, it crash on all calc but not on emulator. But these type are valid because the syscall only edit the correct number of byte.
add #-4,r15 ; main version : unsigned char
mov r15,r4
add #-4,r15 ; minor version : unsigned char
mov r15,r5
add #-4,r15 ; release : unsigned short
mov r15,r6
add #-4,r15 ; build : unsigned short
mov r15,r7
;call syscall
mov.l #h'80010070,r2
jsr @r2
mov #h'15,r0
;put os version into r6
add #8,r15
mov.b @r15,r6 ; minor version
add #4,r15
mov.b @r15,r0 ; main version
add #4,r15
shll8 r0 ; r0 = r0<<8
add r0,r6
;reserved registers :
;r9 second param
;r6 OS version
; read and checks coords
mov.b @r8,r7 ; r7 = Key's column
mov.b @(1,r8),r0
mov r0,r8 ; r8 = Key's row
;verify the row value : 0 ≤ row ≤ 9
mov #0, r0
cmp/gt r8,r0 ; if r0 > r8 ⇒ if 0 > row
bt NegativeEnd
mov #9,r1
cmp/gt r1,r8 ; if r8 > r1 ⇒ if row > 9
bt NegativeEnd
;verify the column value : 0 ≤ row ≤ 6
cmp/gt r7,r0 ; if r0 > r7 ⇒ if 0 > column
bt NegativeEnd
mov #6,r1
cmp/gt r1,r7 ; if r7 > r1 ⇒ if column > 6
bt NegativeEnd
;check if os is > 2.02
mov.w #h'0202,r0
cmp/ge r0,r6 ; r0 ≤ r6
bt SH4
;reserved registers :
;r9 second param
;r8 Key's row
;r7 Key's col
;SH3 part
;r6 = smask = 0x0003 << (( row %8)*2);
mov r8,r0 ; row->r0
and #7,r0 ; %8
add r0,r0 ; *2
mov #3,r6
shld r0,r6 ; 3<<
;r5 = cmask = ~( 1 << ( row %8) );
mov r8,r0 ; row->r0
and #7,r0 ; %8
mov #1,r5
shld r0,r5 ; 1<<
not r5,r5 ; ~
;reserved registers :
;r9 second param
;r8 Key's row
;r7 Key's col
;r6 smask
;r5 cmask
;Preparation of the gbr register
mov.l #h'A4000100,r0
ldc r0,gbr
;RowCond : if(row <8)
mov #8,r0
cmp/gt r8,r0 ; if r0>r8 ; row>=8
bf rowCond_Else
;rowCond_begin
;*PORTB_CTRL = 0xAAAA ^ smask;
mov r6,r0
mov.w #h'AAAA,r1
xor r1,r0
mov.w r0,@(h'02,gbr)
;*PORTM_CTRL = (*PORTM_CTRL & 0xFF00 ) | 0x00AA;
mov.w @(h'18,gbr),r0 ; *PORTM_CTRL->r0
mov.w #h'FF00,r1
and r1,r0 ; *PORTM_CTRL & 0xFF00
or #h'AA,r0 ; | 0x00AA;
mov.w r0,@(h'18,gbr)
;delay()
bsr delay
mov #-10,r4
;*PORTB = cmask;
mov r5,r0
mov.b r0,@(h'22,gbr) ;PORTB = cmask
;*PORTM = (*PORTM & 0xF0 ) | 0x0F;
mov.b @(h'38,gbr),r0 ; *PORTM->r0
and #h'F0,r0 ; *PORTM & 0xF0
or #h'0F,r0 ; | 0x0F;
mov.b r0,@(h'38,gbr)
bra rowCond_End
nop
rowCond_Else:
; *PORTB_CTRL = 0xAAAA;
mov.w #h'AAAA,r0
mov.w r0,@(h'02,gbr)
; *PORTM_CTRL = ((*PORTM_CTRL & 0xFF00 ) | 0x00AA) ^ smask;
mov.w @(h'18,gbr),r0
mov.w #h'FF00,r1
and r1,r0 ; *PORTM_CTRL & 0xFF00
or #h'AA,r0 ; | 0x00AA;
xor r6,r0 ; ^ smask;
mov.b r0,@(h'18,gbr)
;delay()
bsr delay
mov #-10,r4 ;In the begin this was 5, but as the delay function is faster, i need to put more
;*PORTB = 0xFF;
mov.b #h'ff,r0
mov.b r0,@(h'22,gbr) ;PORTB = 0xFF
;*PORTM = (*PORTM & 0xF0 ) | cmask;
mov.b @(h'38,gbr),r0
and #h'F0,r0 ; *PORTM & 0xF0
or r5,r0 ; | cmask;
mov.b r0,@(h'38,gbr)
rowCond_End:
;reserved registers :
;r9 second param
;r8 Key's row
;r7 Key's col
;delay()
bsr delay
mov #-10,r4
;result = (~(*PORTA))>>column & 1;
mov.b @(h'20,gbr),r0
not r0,r6 ; r6 = ~r0
neg r7,r0 ; r0 = -column
shld r0,r6 ; r6 = r6>>column
mov.b #1,r0
and r0,r6
;reserved registers :
;r9 second param
;r8 Key's row
;r7 Key's col
;r6 result
;delay()
bsr delay
mov #-10,r4
; *PORTB_CTRL = 0xAAAA;
mov.w #h'AAAA,r0
mov.w r0,@(h'02,gbr)
;*PORTM_CTRL = (*PORTM_CTRL & 0xFF00 ) | 0x00AA;
mov.w @(h'18,gbr),r0
mov.w #h'FF00,r1
and r1,r0 ; *PORTM_CTRL & 0xFF00
or #h'AA,r0 ; | 0x00AA;
mov.w r0,@(h'18,gbr)
;delay()
bsr delay
mov #-10,r4
; *PORTB_CTRL = 0x5555;
mov.w #h'5555,r0
mov.w r0,@(h'02,gbr)
;*PORTM_CTRL = (*PORTM_CTRL & 0xFF00 ) | 0x0055;
mov.w @(h'18,gbr),r0
mov.w #h'FF00,r1
and r1,r0 ; *PORTM_CTRL & 0xFF00
or #h'55,r0 ; | 0x0055;
mov.w r0,@(h'18,gbr)
;delay()
bsr delay
mov #-10,r4
;End of SH3 part
bra AllEnd
nop
SH4:
;Add 3 to the second param (if >0)to select the right wait time
mov #0,r0
cmp/gt r0,r9
bf negatif2ndParam
add #3,r9
negatif2ndParam:
;get the main keyboard regsiter address+1
mov.l #H'A44B0001,r1
mov r8,r0
tst #1,r0 ;if row is even T=1 else T=0
add r8,r1
bt row_even ; Jump if T=1
add #-2,r1
row_even:
mov.b @r1,r0 ; The byte that contain the row data is now in R0
mov #1,r1
shld r7,r1 ; R9 now contain 1<<col
tst r1,r0 ; if key is pressed T=0
movt r0
not r0,r0
and #h'1,r0
mov r0,r6
bra AllEnd
nop
NegativeEnd:
mov #0,r6
;reserved registers :
;r9 second param
;r8 Key's row
;r7 Key's col
;r6 result
AllEnd:
;Wait the correct time to emulate old functions
bsr delay
mov r9,r4
;put result to return register : r0
mov r6,r0
;take out data from stack
mov.l @r15+,r9
mov.l @r15+,r8
mov.l @r15+,r7
mov.l @r15+,r6
mov.l @r15+,r3
mov.l @r15+,r2
mov.l @r15+,r1
lds.l @r15+,pr
rts
nop
; delay : Wait a defined time
;param 1 (in r4) : slowMode : this determine the time this function will wait to emulate the olds functions
; -n : number of loop
; 0 : fatest as possible (equivalent to -1)
; 1 : = duration of IsKeyDown function for SH3
; 2 : = duration of IsKeyUp function for SH3
; 3 : = duration of KeyDown function for SH3
; 4 : = duration of IsKeyDown function for SH4
; 5 : = duration of IsKeyUp function for SH4
; 6 : = duration of KeyDown function for SH4
delay:
;if r4<=0 then it's the number of loop
mov #0,r0
cmp/ge r0,r4
bf LoopNumber
;Search the number of loop needed
add r4,r4 ; *2
mova loopNumbersList,r0
add r4,r4 ; *2 because there is 4 byte per number of loop (this method take less space than use "MUL.L")
add r4,r0
mov.l @r0,r1
bra target_loopBegin
nop ; this nop is added because without the loopNumbersList is not divisible per 4
loopNumbersList:
.data.l h'0001 ;fastest
.data.l h'0001 ;IsKeyDown SH3
.data.l h'0001 ;IsKeyUp SH3
.data.l h'0001 ;KeyDown SH3
.data.l h'0001 ;IsKeyDown SH4
.data.l h'0001 ;IsKeyUp SH4
.data.l h'0001 ;KeyDown SH4
LoopNumber:
neg r4,r1
;Begin : r1 contain the number of loop
target_loopBegin:
dt r1 ; decrement and test if(r1==0)
bf target_loopBegin
rts
nop
Here is all test that i've done to know the speed of a function and compare
- Code: Select all
;Loop numbers tests : Number of tick taked to execute 5000 times the function
;I made some change between and after theses test so it's possible that if you do it again, you don't find same number, but the little difference will not be really significant. Because this test is on a loop of 5000 times, and mesured in tick (I'm not sure, but I remember it's egal to 1/64 seconds) and one tick is not verry significant for human.
;--------------------------------------------------------------------------------
; |Original | SH3 | SH4 |
;--------------------------------------------------------------------------------
;IsKeyDown |0xb2 |0x17(miss 155) |0x11(miss 161) |
;IsKeyDown with 0x1000 waitloop |---- |0x17b |0x1cd | : Conclusion 0x1000 loop takes 0x164 ticks to be executed on SH3=> (1024/89) loop/ticks
| SH4=> (1024/111) loop/ticks | (I think I've make a mistake somewhere, because on my first try, number of loop was stored in Word (2byte),
| and there was (1024/89) loop/ticks for both cpu, but since I change from word to longword (4byte) and now the sh4 is slower..No idee why.
;IsKeyDown|sh3:1783|sh4:1485 |---- |0xb2 |0xb2 |
;--------------------------------------------------------------------------------
;IsKeyUp |0x1a43 |0x17(miss 6700)|0x11(miss 6706)|
;IsKeyDown|sh3:0x12D1F|sh4:0xF1A8 |---- |0x1a42 |0x1a4a |
;--------------------------------------------------------------------------------
;KeyDown |0x9 |0x11 |0xd | : As the original function is faster than the compatible, we put the slowmode at the minimum for both cpu : 1
;--------------------------------------------------------------------------------
I've done these tests with this code
- Code: Select all
int AddIn_main(int isAppli, unsigned short OptionNum)
{
unsigned int timeBegin;
unsigned int duration;
char string[9];
int i;
while(1)
{
timeBegin = RTC_GetTicks();//RTC_GetTicks is a syscall documented in FxReverse
for(i=0;i<5000;i++)
{
key_down(K_EXE);//Change this function here
}
duration = RTC_GetTicks()-timeBegin;
intToHex(duration, string);
Bdisp_AllClr_DDVRAM();
locate(1,1);
Print((unsigned char*)string);
Bdisp_PutDisp_DD();
}
return 1;
}
void intToHex(unsigned int in, char* string)
{
string[0] = nibbleToHex((unsigned char)in>>28);
string[1] = nibbleToHex((unsigned char)(in>>24)&0xF);
string[2] = nibbleToHex((unsigned char)(in>>20)&0xF);
string[3] = nibbleToHex((unsigned char)(in>>16)&0xF);
string[4] = nibbleToHex((unsigned char)(in>>12)&0xF);
string[5] = nibbleToHex((unsigned char)(in>>8)&0xF);
string[6] = nibbleToHex((unsigned char)(in>>4)&0xF);
string[7] = nibbleToHex((unsigned char)in&0xF);
string[8] = 0;
}
char nibbleToHex(unsigned char in)
{
char out;
if(in <= 9)
out = 0x30 + (unsigned int)in;
else
{
switch(in-10)
{
case 0 : out = 0x61; break;
case 1 : out = 0x62; break;
case 2 : out = 0x63; break;
case 3 : out = 0x64; break;
case 4 : out = 0x65; break;
case 5 : out = 0x66; break;
}
}
return out;
}
- The
IsKeyUp function from FxLib :
OkProblem : 2 big, but bad, surprises ! The first one is that this function didn't works like IsKeyDown. This one use the syscall 0x24C called "Chattering" in the fxLib.
The prototype seem to be :
- Code: Select all
int Chattering(unsigned char* coord);
with coord an array of two cells, the first is the cols, the seconds is the row of the key. This syscall return 1 if the key is pressed.
Second suprise : Syscall are writed in the OS code, so when casio update his OS, they generaly update syscall to works on the new system (it's the case of a lot of usefull syscall), but this one seem to not work on SH4 calc, it allays return 0. So maybe Casio disable this syscall (I know they have thinked about this syscall, because if they just forget it, it would crash the calc, and not just return 0).
Solution : Anyway, I use the same fix of the IsKeyDown function, I just put a "not" at the end.
- Code: Select all
_IsKeyUpReplacement ; put this at offset 0xc of the IsKeyUp function. (the first two byte replaced : 63f0) THe length of the IsKeyUp function is 0x4c
;before : there is call of _KeyCodeConvert that put an array of two byte that respectivly contain the col and the line in the stack
mov r15,r4 ; first param of my _GetKeyState function
mov #2,r5 ; set slowmode of my _GetKeyState function
mov.l GetKeyStateAddress2,r0
jsr @r0 ;call _GetKeyState
nop
nop
;after _GetKeyState
not r0,r0
and #1,r0
add #4,r15
lds.l @r15+,pr
rts
nop
GetKeyStateAddress2:
.data.l h'00301234 ; addres of my compatible _GetKeyState function : the set this address manually after compilation
- The
KeyDown function use by several addins :
OkProblem : This is the same problem as the IsKeyDown function, the I/O register change so we can not read the keyboard input on the SH4 cpu. To identify this function, this was a little harder, because this function is not precompilated like the FxLib. So I found 2 "type" of this function in asm code : when the fist line offset is equal to 0 modulo 4, and when it's equal to 2 modulo 4. Some asm code work only when it's on a mod4=0, like
- Code: Select all
mov.l @(h'4,pc),r0
cause it can read a longword (4byte) only at an offset mod4=0 and the number in parameter need to be divisible by 4. That explain the difference.
Solution : So I replace the KeyDown by this code :
- Code: Select all
_KeyDownReplacement ; put this at beginning of the KeyDown function. (the first four byte replaced : 2FE6634C) THe length of the KeyDown function is 0x100
;before : keycode in r4 ; keycode=col<<4 + row
sts.l pr,@-r15
mov.l r1,@-r15
mov.l r5,@-r15
; add #-2,r15 ;r15 need to always contain a number divisible by 4 (because when we put a longword of 4byte in the stack, we can only put it on adress multiple of 4)
;get the col
mov #-4,r0
mov r4,r1
shld r0,r1
;get the row
mov r4,r0
and #h'f,r0
; mov.b r0,@-r15
; mov.b r1,@-r15
;prepartion of the array content
shll8 r1
add r0,r1
shll16 r1
mov.l r1,@-r15
;prepare _GetKeyState call
mov r15,r4 ; get array address
mov #3,r5 ; set slowmode of _GetKeyState function
mov.l GetKeyStateAddress3,r0
jsr @r0 ;call _GetKeyState
nop
;after _GetKeyState
add #4,r15
mov.l @r15+,r5
mov.l @r15+,r1
lds.l @r15+,pr
rts
nop
GetKeyStateAddress3:
.data.l h'00301234 ; addres of my compatible _GetKeyState function : the set this address manually after compilation
- A problem of "syscall call method" that cause the incompatibiility of monochromeLib :
OkProblem : In the methode use in the monochrom lib :
- Code: Select all
static int SysCallCode[] = {0xD201422B,0x60F20000,0x80010070};
static int (*SysCall)( int R4, int R5, int R6, int R7, int FNo ) = (void*)&SysCallCode;
char* ML_vram_adress()
{
return (char*)((*SysCall)(0, 0, 0, 0, 309));
}
The array SysCallCode is writed in the memory (at the address > 0x0810000, I don't know what is this "memory", all address to this mem are writed directly to the program, so in the beggin of the programm, this memory is empty). When this array is writed to this memory, it jump on it, (the content of this array is a binary code to run syscall). But it seem that on SH4, we can still write and read on this memory, but we cannot jump on it, that why it crashed. So an easy solution is to not write this array on this memory by put it as "const". As const, it will stay in the programm "instruction list", and not copied anywhere else. And the programm instruction list is obviously still readable and executable.
- Code: Select all
static const int SysCallCode[] = {0xD201422B,0x60F20000,0x80010070};
static int (*SysCall)( int R4, int R5, int R6, int R7, int FNo ) = (void*)&SysCallCode;
char* ML_vram_adress()
{
return (char*)((*SysCall)(0, 0, 0, 0, 309));
}
So this work well
Solution : The code above is not a solution for us cause we cannot edit the C without sources.
Another problem is that there is many differents code for this solution, cause this depend of the parameters. (it change if they are constants, variable or pointer..)
Here is a code that can be generated
- Code: Select all
mov.l @(H'114,pc),r3 ;//It get the address where is writed the address to the array, here it's 0x08100014
mov.l @(H'10c,pc),r2 ;//It get the syscall number, here it's 0x135
mov.l @r3,r0 ;//It get the address to the array, here it's 0x08100008
jsr @r0 ;//Jump to the array
mov.l r2,@-r15 ;//This is executed just before to jump : it put the syscall number in the stack
So to solve the problem, I put this code at the end of the file
- Code: Select all
_SyscallFix
mov.l #h'80010070,r2 ;//the syscall table (where we jump to execute a syscall)
jmp @r2 ;//Jump to the syscall table
mov.l @r15,r0 ;//Just before to jump, put the value in the stack to the register r0 (the value in the stack is the syscall number)
And with our original code, i edit it a little :
- Code: Select all
mov.l @(H'114,pc),r3 ;// I change the value pointed to be the address of my function SyscallFix (added at the end of the file)
mov.l @(H'10c,pc),r2 ;
mov.l r3,r0 ;//change to put the address get at the first line in r0
jsr @r0 ;//Jump to the my added code
mov.l r2,@-r15 ;
And it work well with this solution.
The hardest part is how we find the code I give first because it change everytime, but all line that are in the code that I give are here everytime (sometime separated by other instruction, but they are allways here).
It's hard for me to explain, so if you don't understand something, ask
So all I wanted to do is finish, but saddly all addin are still not compatible, there is other bug.. I think there something like a little more than half of uncompatible addin that are now compatible.
The hardest part for others problem is to identify the problem, and it's not easy when you don't have any emulator (sh4) to emulate the problem and see what append.
I keep the SH4 calculator untill the end of the week, so I will try to fix some other problem this week and after i'm going on others projects. But for this time please tell me all addin that are not compatible, I will try to find some problem to fix. Here is my begin of list :
- Dead Labs - come back to the main menu after the introduction
- Doodle jump - Reboot - maybe cause of the grayscale
- MarioLandCE - White screen after introduction (where grayscale begin)
SH4 compatibility tool v1.01
But, be carefull ! This tool can eventually cause damage on your calculator! Use it at your own risk. Planet-Casio, Casiopeia and me will not be responsible for any damage.As example, I suggest you to discover a legend on casio calculator :
Wolfenstein 3D - SH4 versionThanks to :
- Casimo for his SH4 compatibilty fix in C
- Simon Lothar and Andreas Bertheussen for their documentation and syscalls
- Kristaba for
this topic- Alphacreator and Maliafo for testing my first SH4 compatibility test
- Ayfer-Marie for the lend of his SH4 calc
Topic français