////////////////////////////////////////////////////////////////////// // // // cpuARMCore.pas: ARM instruction set decoder and executer // // Decodes and executes ARM instructions // // // // The contents of this file are subject to the Bottled Light // // Public License Version 1.0 (the "License"); you may not use this // // file except in compliance with the License. You may obtain a // // copy of the License at http://www.bottledlight.com/BLPL/ // // // // Software distributed under the License is distributed on an // // "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or // // implied. See the License for the specific language governing // // rights and limitations under the License. // // // // The Original Code is the Mappy VM Core, released April 1st, 2003 // // The Initial Developer of the Original Code is Bottled Light, // // Inc. Portions created by Bottled Light, Inc. are Copyright // // (C) 2001 - 2003 Bottled Light, Inc. All Rights Reserved. // // // // Author(s): // // Michael Noland (joat), michael@bottledlight.com // // // // Changelog: // // 1.0: First public release (April 1st, 2003) // // // // Notes: // // There are still a few bugs either here or in the Thumb core, // // as evidenced by the glitches in the BIOS display sequence. // // It *could* be a result of a bug in the graphics code, but it // // is pretty unlikely. // // // ////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////// unit cpuARMCore; ///////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////// interface //////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////// // EmulateARM() emulates ARM opcodes until one of the following // conditions is met: // the cycle quota is exhausted // the CPU switches into thumb mode // a breakpoint is encountered procedure EmulateArm; ////////////////////////////////////////////////////////////////////// implementation /////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////// uses SysUtils, nexus, AddressSpace, cpuMemory, cpuMisc, cpuGraphics, cpuPeripherals; ////////////////////////////////////////////////////////////////////// // Branch and Exchange (BX) procedure BranchAndExchange; var Rn: uint32; begin // This instruction performs a branch by copying the contents of a // register Rn into the PC. The branch causes a pipeline flush and // refill from the address specified by Rn, and Rn[0] determines // whether the instruction stream will be decoded as ARM or Thumb. Rn := regs[cpuCurrentOpcode and $F]; regs[R15] := Rn and not 1; thumbMode := Rn and 1 <> 0; if thumbMode then FlushPipeThumb else FlushPipeARM; end; ////////////////////////////////////////////////////////////////////// // Branch and Branch with Link (B, BL) procedure Branch; const LINK_BIT = 1 shl 24; var dest: uint32; begin // Branch with Link (BL) writes the address of the instruction // following the BL instruction into the LR of the current mode. // Note that the CPSR is not saved and R14[1:0] are always cleared. if cpuCurrentOpcode and LINK_BIT <> 0 then regs[R14] := (regs[R15] - 4) and not 3; // Branch instructions contain a 2's complement 24 bit offset. This // is shifted left two bits, sign extended to 32 bits, and added to // the PC. Thus, the instruction can specify a branch of +/- 32 MB. dest := (cpuCurrentOpcode and $00FFFFFF) shl 2; if dest and (1 shl 25) <> 0 then dest := $FC000000 or dest; regs[R15] := regs[R15] + dest; FlushPipeARM; end; ////////////////////////////////////////////////////////////////////// procedure DataProcessing(rsShift: boolean; operand2: uint32); const SET_FLAGS_BIT = 1 shl 20; // S bit var Rn, Rd, Rm: byte; operand1, test: uint32; neg1, neg2, negr: boolean; begin // Data Processing // The instruction produces a result by performing a ALU operation // on one or two operands. The first operand is always a register // (Rn). The second operand may be a shifted register (Rm) or a // rotated 8 bit immediate value (Imm) according to the value of // the I bit. The CPSR condition codes can be optionally updated // as a result of this instruction, depending on the the S bit. // // Due to instruction prefetching, the PC will be 8 bytes ahead if // the shift amount is specified in the instruction. If a register // is used to specify the shift, the PC will be 12 bytes ahead. if rsShift then Inc(regs[R15], 4); // Parse the operands Rn := (cpuCurrentOpcode shr 16) and $F; Rd := (cpuCurrentOpcode shr 12) and $F; operand1 := regs[Rn]; // Do the actual processing if cpuCurrentOpcode and SET_FLAGS_BIT <> 0 then begin // CPSR flags // The logical operations (AND, EOR, TST, TEQ, ORR, MOV, BIC, MVN) // perform the logical action on all corresponding bits of the // operand or operands to produce the result. If the S bit is set // the V flag in the CPSR will be unaffected and the C flag will // be set to the carry out from the barrel shifter (or preserved // if the shift operation is LSL #0). // // The arithmetic operations (SUB, RSB, ADD, ADC, SBC, RSC, CMP, // CMN) treat each operand as a 32 bit integer. If the S bit is // set, the V flag in the CPSR will be set if an overflow occurs // into bit 31 of the result. The C flag will be set to the carry // out of bit 31 of the ALU. case ((cpuCurrentOpcode shr 21) and $F) of AND_OPCODE: begin test := operand1 and operand2; carry := barrelCarry; regs[Rd] := test; end; EOR_OPCODE: begin test := operand1 xor operand2; carry := barrelCarry; regs[Rd] := test; end; SUB_OPCODE: begin test := operand1 - operand2; neg1 := operand1 shr 31 <> 0; neg2 := operand2 shr 31 <> 0; negr := test shr 31 <> 0; overflow := (neg1 and not neg2 and not negr) or (not neg1 and neg2 and negr); carry := (neg1 and not neg2) or (neg1 and not negr) or (not neg2 and not negr); regs[Rd] := test; end; RSB_OPCODE: begin test := operand2 - operand1; neg1 := operand2 shr 31 <> 0; neg2 := operand1 shr 31 <> 0; negr := test shr 31 <> 0; overflow := (neg1 and not (neg2 or negr)) or ((neg2 and negr) and not neg1); carry := (neg1 and not neg2) or (neg1 and not negr) or (not neg2 and not negr); regs[Rd] := test; end; ADD_OPCODE: begin test := operand1 + operand2; neg1 := operand1 shr 31 <> 0; neg2 := operand2 shr 31 <> 0; negr := test shr 31 <> 0; overflow := (neg1 = neg2) and (neg1 <> negr); carry := (neg1 and neg2) or ((neg1 or neg2) and not negr); regs[Rd] := test; end; ADC_OPCODE: begin test := operand1 + operand2; if carry then Inc(test); neg1 := operand1 shr 31 <> 0; neg2 := operand2 shr 31 <> 0; negr := test shr 31 <> 0; overflow := (neg1 = neg2) and (neg1 <> negr); carry := (neg1 and neg2) or ((neg1 or neg2) and not negr); regs[Rd] := test; end; SBC_OPCODE: begin test := operand1 - operand2; if not carry then Dec(test); neg1 := operand1 shr 31 <> 0; neg2 := operand2 shr 31 <> 0; negr := test shr 31 <> 0; overflow := (neg1 and not (neg2 or negr)) or ((neg2 and negr) and not neg1); carry := (neg1 and not neg2) or (neg1 and not negr) or (not neg2 and not negr); regs[Rd] := test; end; RSC_OPCODE: begin test := operand2 - operand1; if not carry then Dec(test); neg1 := operand2 shr 31 <> 0; neg2 := operand1 shr 31 <> 0; negr := test shr 31 <> 0; overflow := (neg1 and not (neg2 or negr)) or ((neg2 and negr) and not neg1); carry := (neg1 and not neg2) or (neg1 and not negr) or (not neg2 and not negr); regs[Rd] := test; end; TST_OPCODE: begin test := operand1 and operand2; carry := barrelCarry; end; TEQ_OPCODE: begin test := operand1 xor operand2; carry := barrelCarry; end; CMP_OPCODE: begin test := operand1 - operand2; neg1 := operand1 shr 31 <> 0; neg2 := operand2 shr 31 <> 0; negr := test shr 31 <> 0; carry := (neg1 and not neg2) or (neg1 and not negr) or (not neg2 and not negr); overflow := (neg1 and not neg2 and not negr) or (not neg1 and neg2 and negr); end; CMN_OPCODE: begin test := operand1 + operand2; neg1 := operand1 shr 31 <> 0; neg2 := operand2 shr 31 <> 0; negr := test shr 31 <> 0; overflow := (neg1 = neg2) and (neg1 <> negr); carry := (neg1 and neg2) or ((neg1 or neg2) and not negr); end; ORR_OPCODE: begin test := operand1 or operand2; carry := barrelCarry; regs[Rd] := test; end; MOV_OPCODE: begin test := operand2; carry := barrelCarry; regs[Rd] := test; end; BIC_OPCODE: begin test := operand1 and not operand2; carry := barrelCarry; regs[Rd] := test; end; MVN_OPCODE: begin test := not operand2; carry := barrelCarry; regs[Rd] := test; end; else test := 0; // shut up delphi! end; // The Z flag will be set if the result is zero, and the N flag // will be set to the value of bit 31 of the result. zero := test = 0; negative := test shr 31 <> 0; // When Rd is R15 and the S flag is set, the result of the is // placed in R15 and the SPSR corresponding to the current mode // is moved to the CPSR. This allows state changes which // atomically restore both PC and CPSR. This form of instruction // should not be used in User mode. if Rd = R15 then begin rsShift := false; // make sure we don't do the PC fixup hack if SPSR <> 0 then cpuWriteCPSR(regs[SPSR]); if thumbMode then FlushPipeThumb else FlushPipeARM; end; end else begin // S bit is clear, ignore changes to the flags case ((cpuCurrentOpcode shr 21) and $F) of AND_OPCODE: begin regs[Rd] := operand1 and operand2; // As if by magik if (Rd = 0) and (Rn = 0) and (cpuCurrentOpcode and $FFF = 0) and (operand1 = $c0ded00d) then DebugTrap; end; EOR_OPCODE: regs[Rd] := operand1 xor operand2; SUB_OPCODE: regs[Rd] := operand1 - operand2; RSB_OPCODE: regs[Rd] := operand2 - operand1; ADD_OPCODE: regs[Rd] := operand1 + operand2; ADC_OPCODE: begin test := operand1 + operand2; if carry then Inc(test); regs[Rd] := test; end; SBC_OPCODE: begin test := operand1 - operand2; if not carry then Dec(test); regs[Rd] := test; end; RSC_OPCODE: begin test := operand2 - operand1; if not carry then Dec(test); regs[Rd] := test; end; // PSR Transfer (MRS, MSR) // The instruction is only executed if the condition is true. // The MRS and MSR instructions are formed from a subset of the Data Processing // operations and are implemented using the TEQ, TST, CMN and CMP instructions // without the S flag set. // // These instructions allow access to the CPSR and SPSR registers. The MRS // instruction allows the contents of the CPSR or SPSR_ to be moved to a // general register. The MSR instruction allows the contents of a general register to be // moved to the CPSR or SPSR_ register. // // The MSR instruction also allows an immediate value or register contents to be // transferred to the condition code flags (N,Z,C and V) of CPSR or SPSR_ // without affecting the control bits. In this case, the top four bits of the specified register // contents or 32 bit immediate value are written to the top four bits of the relevant PSR. // // Operand restrictions // o In User mode, the control bits of the CPSR are protected from change, so only // the condition code flags of the CPSR can be changed. In other (privileged) // modes the entire CPSR can be changed. // // Note that the software must never change the state of the T bit in the CPSR. // If this happens, the processor will enter an unpredictable state. fixme: not caught! // You must not specify R15 as the source or destination register. // Do not attempt to access an SPSR in User mode, since no such register exists. TST_OPCODE: begin // MRS Rd, CPSR (transfer PSR contents to a register) // cond 00 0 10P0 0 1111 [Rd] 0000 0000 0000 if cpuCurrentOpcode and $F0FFF = $F0000 then regs[Rd] := cpuReadCPSR else UndefinedState('MRS Rd, CPSR found that does not decode properly'); end; TEQ_OPCODE: begin Rm := cpuCurrentOpcode and $F; if cpuCurrentOpcode and $FFFF0 = $9F000 then begin // MSR CPSR, Rm cond 00 0 10P1 0 1001 1111 0000 0000 [Rm] test := regs[Rm]; if cpuMode = MODE_USER then begin // You can only set the flag bits in user mode negative := test and SR_N <> 0; zero := test and SR_Z <> 0; carry := test and SR_C <> 0; overflow := test and SR_V <> 0; end else cpuWriteCPSR(test); end else if cpuCurrentOpcode and $FF000 = $8F000 then begin // MSR CPSR, Rm cond 00 0 10P1 010001111 0000 0000 [Rm] // MSR CPSR, operand2 cond 00 1 10P1 010001111 Rotate Immedia // 0 1001 010001111 0000 0000 0100 if cpuCurrentOpcode and (1 shl 25) = 0 then begin if Rm = R15 then UndefinedState('MSR CPSR_flg, r15'); if cpuCurrentOpcode and $FF0 <> 0 then UndefinedState('MSR CPSR_flg, Rm using the barrel shifter'); end; negative := operand2 and SR_N <> 0; zero := operand2 and SR_Z <> 0; carry := operand2 and SR_C <> 0; overflow := operand2 and SR_V <> 0; end else begin UndefinedState('MSR (TEQ-type) found that does not decode properly'); end; // keep pc from changing Rd := 0; end; CMP_OPCODE: begin // MRS Rd, SPSR (transfer SPSR contents to a register) // cond 00 0 10P0 0 1111 [Rd] 0000 0000 0000 if cpuCurrentOpcode and $F0FFF = $F0000 then begin if SPSR <> 0 then regs[Rd] := regs[SPSR]; end else UndefinedState('MRS Rd, SPSR found that does not decode properly'); end; CMN_OPCODE: begin Rm := cpuCurrentOpcode and $F; if cpuCurrentOpcode and $FFFF0 = $9F000 then begin // MSR SPSR, Rm cond 00 0 10P1 0 1001 1111 0000 0000 [Rm], P=1 if Rm = R15 then UndefinedState('MSR SPSR, r15'); if SPSR <> 0 then regs[SPSR] := regs[Rm]; end else if cpuCurrentOpcode and $FF000 = $8F000 then begin // MSR SPSR, Rm cond 00 0 10P1 0 1000 1111 0000 0000 [Rm] // MSR SPSR, operand2 cond 00 1 10P1 0 1000 1111 Rotate Immedia if SPSR <> 0 then regs[SPSR] := (operand2 and $F0000000) or (regs[SPSR] and $0FFFFFFF); end else UndefinedState('MSR (CMN-type) found that does not decode properly'); // keep pc from changing Rd := 0; end; // Back to standard ALU ops ORR_OPCODE: regs[Rd] := operand1 or operand2; MOV_OPCODE: regs[Rd] := operand2; BIC_OPCODE: regs[Rd] := operand1 and not operand2; MVN_OPCODE: regs[Rd] := not operand2; end; if Rd = R15 then FlushPipeARM; end; // Since I do the pipelining a little differently than the ARM7tdmi // does in hardware, I update R15 outside of this func regardless // of the operand 2 format, so I have to fix R15 back up if its // further along than normal (register specified shift) if rsShift then Dec(regs[R15], 4); end; ////////////////////////////////////////////////////////////////////// // Multiply and Multiply-Accumulate (MUL, MLA) procedure Multiply; const SET_FLAGS_BIT = 1 shl 20; ACCUMULATE_BIT = 1 shl 21; var Rd, Rm, Rs: byte; m: uint32; begin // Multiply and Multiply-Accumulate (MUL, MLA) // The multiply and multiply-accumulate instructions use an 8 bit // Booth's algorithm to perform integer multiplication. // The destination register Rd must not be the same as the operand // register Rm, and R15 should not be used at all. Rm := cpuCurrentOpcode and $F; Rs := (cpuCurrentOpcode shr 8) and $F; Rd := (cpuCurrentOpcode shr 16) and $F; // MUL uses m internal cycles, and MLA m + 1 where m is the number // of multiplier array cycles required to complete the multiply, // which is controlled by the value of the multiplier operand Rs // m is 1 if bits [32:8] of Rs are all 0 or all 1 // m is 2 if bits [32:16] of Rs are all 0 or all 1 // m is 3 if bits [32:24] of Rs are all 0 or all 1 // m is 4 in all other cases. m := regs[Rs]; if m shr 31 <> 0 then m := not m; if m and $FFFFFF00 = 0 then m := 1 else if m and $FFFF0000 = 0 then m := 2 else if m and $FF000000 = 0 then m := 3 else m := 4; if cpuCurrentOpcode and ACCUMULATE_BIT = 0 then begin // The multiply form of the instruction gives Rd := Rm * Rs Dec(quota, m * cycI); regs[Rd] := regs[Rm] * regs[Rs]; end else begin // The multiply-accumulate form gives Rd := Rm * Rs + Rn Dec(quota, (m+1) * cycI); regs[Rd] := regs[Rm] * regs[Rs] + regs[(cpuCurrentOpcode shr 12) and $F]; end; // If the S bit is set, the N and Z flags are set according to the // result, and the C flag is set to a meaningless value. if cpuCurrentOpcode and SET_FLAGS_BIT <> 0 then begin negative := regs[Rd] shr 31 <> 0; zero := regs[Rd] = 0; end; end; ////////////////////////////////////////////////////////////////////// // Herein lies the only ASM used in the MappyVM core, and out of // laziness rather than need for speed. IA32 already provides us with // a nice 32x32->64 multiply, why reinvent the wheel. procedure LongMultiply; const DO_ACCUMULATE = 1 shl 21; IS_SIGNED = 1 shl 22; SET_FLAGS = 1 shl 20; var signed: boolean; Rm, Rs, RdHi, RdLo, m: uint32; begin // Multiply Long and Multiply-Accumulate Long (MULL, MLAL) // Multiply long instructions perform integer multiplication on two // 32 bit operands and produce 64 bit results. Signed and unsigned // multiplication with optional accumulate total to 4 variations. signed := cpuCurrentOpcode and IS_SIGNED <> 0; Rm := regs[cpuCurrentOpcode and $F]; Rs := regs[(cpuCurrentOpcode shr 8) and $F]; m := Rs; // MULL uses m+1 internal cycles and MLAL m+2, where m is the // number of 8 bit multiplier array cycles required to complete // the multiply, which is controlled by the value of Rs. // Possible values of m for signed instructions SMULL, SMLAL are: // m is 1 if bits [32:8] of Rs are all 0 or all 1 // m is 2 if bits [32:16] of Rs are all 0 or all 1 // m is 3 if bits [32:24] of Rs are all 0 or all 1 // m is 4 in all other cases. if signed and (Rs shr 31 <> 0) then m := not m; // For unsigned instructions UMULL, UMLAL, m is the same as for the // signed instructions, except the regions must be all 0's if m and $FFFFFF00 = 0 then m := 2 else if m and $FFFF0000 = 0 then m := 3 else if m and $FF000000 = 0 then m := 4 else m := 5; // R15 must not be used as an operand or as a destination register. // RdHi, RdLo, and Rm must all specify different registers. if cpuCurrentOpcode and DO_ACCUMULATE = 0 then begin // Convert the m count into actual cycles Dec(quota, m*cycI); // The multiply forms (UMULL and SMULL) take two 32 bit numbers // and multiply them to produce a 64 bit result of the form: // (RdHi,RdLo) := Rm * Rs. if signed then asm mov eax,[Rm] imul dword ptr [Rs] mov [RdLo],eax mov [RdHi],edx end else asm mov eax,[Rm] mul dword ptr [Rs] mov [RdLo],eax mov [RdHi],edx end; end else begin // Convert the m count into actual cycles Dec(quota, (m+1)*cycI); // The multiply-accumulate forms (UMLAL and SMLAL) take two 32 bit // numbers, multiply them together, and add the 64 bit value that // was initially in (RdHi,RdLo), as shown below: // (RdHi,RdLo) := Rm * Rs + (RdHi,RdLo) RdHi := regs[(cpuCurrentOpcode shr 16) and $F]; RdLo := regs[(cpuCurrentOpcode shr 12) and $F]; if signed then asm mov eax,[Rm] imul dword ptr [Rs] add [RdLo],eax adc [RdHi],edx end else asm mov eax,[Rm] mul dword ptr [Rs] add [RdLo],eax adc [RdHi],edx end; end; regs[(cpuCurrentOpcode shr 16) and $F] := RdHi; regs[(cpuCurrentOpcode shr 12) and $F] := RdLo; // If the S bit is set, the N and Z flags are set according to the // result (N is equal to bit 63 of the result, and Z is set if and // only if all 64 bits are 0), and the C and V flags are set to // meaningless values. if cpuCurrentOpcode and SET_FLAGS <> 0 then begin if (RdLo = 0) and (RdHi = 0) then begin zero := true; negative := false; end else begin zero := false; negative := RdHi shr 31 <> 0; end; end; end; ////////////////////////////////////////////////////////////////////// procedure SingleDataTransfer; const LOAD_BIT = 1 shl 20; WRITEBACK_BIT = 1 shl 21; BYTE_BIT = 1 shl 22; UP_BIT = 1 shl 23; PRE_INCREMENT_BIT = 1 shl 24; IMMEDIATE_BIT = 1 shl 25; var Rn, Rd, Rm: byte; index, operand2: uint32; preIncrement: boolean; begin // Single Data Transfer (LDR, STR) // The single data transfer instructions are used to load or store a // single byte or word. The memory address used for the transfer is // obtained by adding or subtracting an offset from a base register. // Parse the operands Rn := (cpuCurrentOpcode shr 16) and $F; Rd := (cpuCurrentOpcode shr 12) and $F; Rm := cpuCurrentOpcode and $F; index := regs[Rn]; // The offset from the base may be either a 12 bit immediate value, // or a second register (possibly shifted in some way). The offset // can be added or subtracted from the base register Rn before or // after the base is used as the transfer address. if cpuCurrentOpcode and IMMEDIATE_BIT = 0 then operand2 := cpuCurrentOpcode and $FFF else operand2 := BarrelShifter(regs[Rm], (cpuCurrentOpcode shr 5) and $3, (cpuCurrentOpcode shr 7) and $1F); // Are we indexing forwards or backwards if cpuCurrentOpcode and UP_BIT = 0 then operand2 := -operand2; preIncrement := cpuCurrentOpcode and PRE_INCREMENT_BIT <> 0; if preIncrement then begin Inc(index, operand2); // The modified base value may be written back if W is 1. The W // bit is redundant with post-indexed addressing and set to 0, as // the modified base is always written back. // Write-back must not be specified if R15 is the base register if cpuCurrentOpcode and WRITEBACK_BIT <> 0 then regs[Rn] := index; end; // Check the L bit and see if its a load or store if cpuCurrentOpcode and LOAD_BIT <> 0 then begin // Load from memory if cpuCurrentOpcode and BYTE_BIT = 0 then regs[Rd] := memLoadWord(index) else regs[Rd] := memReadByte(index); // LDR instructions have one trailing I cycle, unless the PC is // modified, in which case the I cycle is in the middle. Dec(quota, cycI); if Rd = R15 then FlushPipeARM; end else begin // When R15 is the source register, the stored value will be // the address of the instruction plus 12. Inc(regs[R15], 4); // Store to memory if cpuCurrentOpcode and BYTE_BIT = 0 then memWriteWord(index, regs[Rd]) else memWriteByte(index, regs[Rd]); Dec(regs[R15], 4); end; // Check the P bit and post-increment if neccesary if not preIncrement then regs[Rn] := index + operand2; end; ////////////////////////////////////////////////////////////////////// procedure HalfwordXfer; const LOAD_BIT = 1 shl 20; WRITEBACK_BIT = 1 shl 21; IMMEDIATE_BIT = 1 shl 22; UP_BIT = 1 shl 23; PRE_INCREMENT_BIT = 1 shl 24; var Rn, Rd, Rm: byte; index, offset: uint32; preIncrement: boolean; begin // Halfword and Signed Data Transfer (LDRH/STRH/LDRSB/LDRSH) // These instructions are used to load or store halfwords of data // and also load sign-extended bytes or halfwords. The address used // in the transfer is calculated by via a combination of a base // register and adding or subtracting an offset. The result of this // calculation may be written back into the base register as well. // R15 should not be specified as the register offset (Rm). // Write-back should not be specified if R15 is specified as the // base register (Rn) or if post-indexing is used. // Parse the operands Rm := cpuCurrentOpcode and $F; Rd := (cpuCurrentOpcode shr 12) and $F; Rn := (cpuCurrentOpcode shr 16) and $F; index := regs[Rn]; // The offset from the base may be either a 8-bit unsigned binary // immediate value, or a second register. The 8-bit offset is // formed by concatenating bits 11 to 8 and bits 3 to 0 of the // instruction word, such that bit 11 becomes the MSB and bit 0 // becomes the LSB. The offset may be added to (U=1) or subtracted // from (U=0) the base register Rn, and this may be performed either // before (pre-indexed, P=1) or after (post-indexed, P=0) the base // register is used as the transfer address. if cpuCurrentOpcode and IMMEDIATE_BIT <> 0 then offset := (cpuCurrentOpcode shr 4) and $F0 + Rm else offset := regs[Rm]; // Are we indexing forwards or backwards if cpuCurrentOpcode and UP_BIT = 0 then offset := -offset; // Check the P bit and pre-increment if neccesary preIncrement := cpuCurrentOpcode and PRE_INCREMENT_BIT <> 0; if preIncrement then begin Inc(index, offset); if cpuCurrentOpcode and WRITEBACK_BIT <> 0 then regs[Rn] := index; end; // Halfword loads and stores on an address with A0=1 are // 'unpredictable', so don't do anything special ATM // Check the L bit and see if its a load or store if cpuCurrentOpcode and LOAD_BIT <> 0 then begin // Load halfword, or signed byte/halfword from memory case ((cpuCurrentOpcode shr 5) and $3) of 1: regs[Rd] := memReadHalfWord(index); 2: begin // The LDRSB instruction loads the selected byte into the // destination register and sign extends it to 32 bits. regs[Rd] := memReadByte(index); if regs[Rd] shr 7 <> 0 then regs[Rd] := $FFFFFF00 or regs[Rd]; end; 3: begin // The LDRSH instruction loads the selected halfword into the // destination register and sign extends it to 32 bits regs[Rd] := memReadHalfWord(index); if regs[Rd] shr 15 <> 0 then regs[Rd] := $FFFF0000 or regs[Rd]; end; end; // LDR(H,SH,SB) instructions have one trailing I cycle, unless // the PC is modified, in which case the I cycle is in the middle. Dec(quota, cycI); if Rd = R15 then FlushPipeARM; end else begin // Store halfword to memory // When R15 is the source register Rd, the address will be the // address of the instruction plus 12 if Rd = R15 then memWriteHalfWord(index, regs[R15] + 4) else memWriteHalfWord(index, regs[Rd]); end; // In the case of post-indexed addressing, the write back bit is // redundant and is always set to zero, since post-indexed data // transfers always write back the modified base. if not preIncrement then regs[Rn] := index + offset; end; ////////////////////////////////////////////////////////////////////// procedure BlockDataTransfer; const LOAD_BIT = 1 shl 20; WRITEBACK_BIT = 1 shl 21; PSR_FORCE_BIT = 1 shl 22; UP_BIT = 1 shl 23; PRE_INCREMENT_BIT = 1 shl 24; var bits, base: byte; i: integer; address, temp: uint32; begin // Block Data Transfer (LDM, STM) // The instruction can transfer any registers in the current bank // (and also to and from the user bank in non-user modes). The // register list is a 16 bit field in the instruction, where each // bit corresponds to a register. // // Addressing modes // The transfer addresses are determined by the contents of the base // register (Rn), the pre/post bit (P) and the up/down bit (U). The // registers are transferred in the order lowest to highest, so R15 // (if in the list) will always be transferred last. The lowest // register also gets transferred to/from the lowest memory address. // // Address alignment // The address should normally be a word aligned quantity and non // word aligned addresses do not affect the instruction. However, // the bottom 2 bits of the address will appear on A[1:0] and might // be interpreted by the memory system. // // R15 should not be used as the base register. base := (cpuCurrentOpcode shr 16) and $F; address := regs[base]; // Count the number of bits in the list bits := 0; for i := 0 to 15 do Inc(bits, (cpuCurrentOpcode shr i) and 1); if cpuCurrentOpcode and LOAD_BIT <> 0 then begin // LDM - Load multiple registers from memory // Compute the start address and calculate the writeback value if cpuCurrentOpcode and UP_BIT = 0 then begin Dec(address, 4*bits); // Since we start from the bottom and increment instead // of from the top and decrementing, the order we increment // must change as well. cpuCurrentOpcode := cpuCurrentOpcode xor PRE_INCREMENT_BIT; // Write the modified value back if needed if cpuCurrentOpcode and WRITEBACK_BIT <> 0 then regs[base] := address; end else begin if cpuCurrentOpcode and WRITEBACK_BIT <> 0 then regs[base] := address + 4*bits; end; // Read any registers in the list if cpuCurrentOpcode and PRE_INCREMENT_BIT <> 0 then begin for i := 0 to 15 do if (cpuCurrentOpcode shr i) and 1 <> 0 then begin Inc(address, 4); regs[i] := memReadWord(address); end; end else begin for i := 0 to 15 do if (cpuCurrentOpcode shr i) and 1 <> 0 then begin regs[i] := memReadWord(address); Inc(address, 4); end; end; if (cpuCurrentOpcode shr 15) and 1 <> 0 then begin // If a LDM has R15 in its transfer list and the S bit set, a // mode change will take place where SPSR_ is transferred // to CPSR at the same time R15 is loaded and the pipe flushed. if (cpuCurrentOpcode and PSR_FORCE_BIT <> 0) and (SPSR <> 0) then cpuWriteCPSR(regs[SPSR]); if thumbMode then FlushPipeThumb else FlushPipeARM; end else begin // User bank transfers occur when the S bit is set and R15 is // not in the list. The user bank registers are transferred // instead of the banked registers of the current mode. Base // write-back should not be used when this mechanism is // employed. For LDM, care must be taken not to read from a // banked register during the following cycle if cpuCurrentOpcode and PSR_FORCE_BIT <> 0 then LogWriteLn(Format('Unimplemented opcode at $%8.8x: LDM with forced user bank', [regs[R15]-8])); end; // LDR has a trailing I cycle that gets merged with the next // opcode fetch normally Dec(quota, cycI); end else begin // STM - Store multiple registers to memory // Whenever R15 is stored to memory the stored value is the // address of the STM instruction plus 12. Inc(regs[R15], 4); // Compute the start address and calculate the writeback value if cpuCurrentOpcode and UP_BIT = 0 then begin Dec(address, 4*bits); // Since we start from the bottom and increment instead // of from the top and decrementing, the order we increment // must change as well. cpuCurrentOpcode := cpuCurrentOpcode xor PRE_INCREMENT_BIT; // Write the modified value back if needed if cpuCurrentOpcode and WRITEBACK_BIT <> 0 then begin temp := regs[base]; regs[base] := address; if cpuCurrentOpcode and (1 shl base) <> 0 then begin // stm base!, {base, ...} with base as the lowest register // stores the original value of base, not the computed // final value, damn you ARM if cpuCurrentOpcode and not ($FFFFFFFF shl base) = 0 then begin cpuCurrentOpcode := cpuCurrentOpcode xor (1 shl base); if cpuCurrentOpcode and PRE_INCREMENT_BIT = 0 then begin memWriteWord(address, temp); Inc(address, 4); end else begin Inc(address, 4); memWriteWord(address, temp); end; end; end; end; end else begin if cpuCurrentOpcode and WRITEBACK_BIT <> 0 then regs[base] := address + 4*bits; end; // Write any registers in the list if cpuCurrentOpcode and PRE_INCREMENT_BIT <> 0 then begin for i := 0 to 15 do if (cpuCurrentOpcode shr i) and 1 <> 0 then begin Inc(address, 4); memWriteWord(address, regs[i]); end; end else begin for i := 0 to 15 do if (cpuCurrentOpcode shr i) and 1 <> 0 then begin memWriteWord(address, regs[i]); Inc(address, 4); end; end; // Since the encapsulating code takes care of the advancement, // we need to repair R15 before returning Dec(regs[R15], 4); // User bank transfers occur when the S bit is set and R15 is not // in the list. The user bank registers are transferred instead // of the banked registers of the current mode. Base write-back // should not be used when this mechanism is employed. if cpuCurrentOpcode and PSR_FORCE_BIT <> 0 then LogWriteLn(Format('Unimplemented opcode at $%8.8x: STM with forced user bank', [regs[R15]-8])); end; end; ////////////////////////////////////////////////////////////////////// procedure SingleDataSwap; const AS_BYTE_BIT = 1 shl 22; var Rn, Rd, Rm: byte; address, source: uint32; begin // Single Data Swap (SWP) // The data swap instruction is used to swap a byte or word between // a register and external memory. This instruction is implemented // as a memory read followed by a memory write which are atomic. // Do not use R15 as an operand (Rd, Rn or Rm) in a SWP instruction. // Parse the operands Rn := (cpuCurrentOpcode shr 16) and $F; Rd := (cpuCurrentOpcode shr 12) and $F; Rm := cpuCurrentOpcode and $F; address := regs[Rn]; // The swap address is determined by the base register (Rn). The // processor first reads the contents of the swap address, then it // writes the contents of the source register (Rm) to the swap // address, and stores the old memory contents in the destination // register (Rd). The same register may be specified as both the // source and destination. source := regs[Rm]; if cpuCurrentOpcode and AS_BYTE_BIT <> 0 then begin regs[Rd] := memReadByte(address); memWriteByte(address, source); end else begin regs[Rd] := memLoadWord(address); memWriteWord(address, source); end; // Swap instructions have a trailing I cycle Dec(quota, cycI); end; ////////////////////////////////////////////////////////////////////// procedure EmulateArm; var shift: uint32; Rs, Rm: byte; operand2: uint32; cond: boolean; begin repeat if irqPending and not irqDisabled then Exit; {$IFDEF SIGNATURES} // Add an ARM signature to the address of the current opcode operand2 := regs[R15]-4; if operand2 >= $08000000 then begin operand2 := (operand2 shr 1) and $FFFFFF; sigs[operand2] := sigs[operand2] or ARM_READ; end; {$ENDIF} // Read the current instruction // leave PC at nextAddress + L cpuCurrentOpcode := regs[PIPELINE_0]; regs[PIPELINE_0] := regs[PIPELINE_1]; // at currentAddress + L Inc(regs[R15], 4); regs[PIPELINE_1] := memReadWordUnc(regs[R15]); // at currentAddress + 2L // Process the rest of the instruction if the condition is true rs := cpuCurrentOpcode shr 28; {$I incTestCond.pas} if cond then begin if cpuCurrentOpcode and BX_MASK = BX_SIG then begin BranchAndExchange; end else if cpuCurrentOpcode and BRANCH_MASK = BRANCH_SIG then begin Branch; end else if cpuCurrentOpcode and MULTIPLY_MASK = MULTIPLY_SIG then begin Multiply; end else if cpuCurrentOpcode and MULTIPLY_LONG_MASK = MULTIPLY_LONG_SIG then begin LongMultiply; end else if cpuCurrentOpcode and ALU_SHIFT_BY_IMM_MASK = ALU_SHIFT_BY_IMM_SIG then begin // Instruction specified shift amount: the shift amount is // specified in the instruction as a 5 bit field operand2 := BarrelShifter(regs[cpuCurrentOpcode and $F], (cpuCurrentOpcode shr 5) and $3, (cpuCurrentOpcode shr 7) and $1F); // Perform the operation DataProcessing(false, operand2); end else if cpuCurrentOpcode and ALU_SHIFT_BY_REG_MASK = ALU_SHIFT_BY_REG_SIG then begin // Register specified shift amount Rm := cpuCurrentOpcode and $F; Rs := (cpuCurrentOpcode shr 8) and $F; // Only the least significant byte of the contents of Rs is // used to determine the shift amount. Rs can be any general // register other than R15. shift := regs[Rs] and $FF; if shift = 0 then begin // If this byte is zero, the unchanged contents of Rm will // be used as the second operand, and the old value of the // CPSR C flag will be passed on as the shifter carry out. operand2 := regs[Rm]; barrelCarry := carry; end else if shift > 31 then begin // If the value in the byte is 32 or more, the result will // be a logical extension of the shift described above: // LSL by 32: Result zero, carry out equal to Rm[0] // LSL by more than 32: Result zero, carry out zero // LSR by 32: Result zero, carry out equal to Rm[31] // LSR by more than 32: Result zero, carry out zero // ASR by 32 or more: Result filled with and carry out // equal to bit 31 of Rm. // ROR by 32: Result equal to Rm, carry out is Rm[31] // ROR by n where n is greater than 32: Gives the same // result and carry out as ROR by n-32 operand2 := 0; case ((cpuCurrentOpcode shr 5) and $3) of LSL: barrelCarry := (shift = 32) and (regs[Rm] and (1 shl 0) <> 0); LSR: barrelCarry := (shift = 32) and (regs[Rm] shr 31 <> 0); ASR: operand2 := BarrelShifter(regs[Rm], (cpuCurrentOpcode shr 5) and $3, shift); ROR: begin if shift and 31 = 0 then begin operand2 := regs[Rm]; barrelCarry := regs[Rm] shr 31 <> 0; end else operand2 := BarrelShifter(regs[Rm], (cpuCurrentOpcode shr 5) and $3, shift and 31); end; end; end else // If the byte has a value between 1 and 31, the shifted // result will exactly match that of a specified shift with // the same value and shift operation. operand2 := BarrelShifter(regs[Rm], (cpuCurrentOpcode shr 5) and $3, shift); // Perform the data processing operation DataProcessing(true, operand2); // Register specified shift data processing opcodes have a // trailing I cycle Dec(quota, cycI); end else if cpuCurrentOpcode and ALU_IMM_ROT_MASK = ALU_IMM_ROT_SIG then begin // The immediate operand rotate field is a 4 bit unsigned // integer which specifies a shift operation on the 8 bit // immediate value. This value is zero extended to 32 bits, // and then subject to a rotate right by twice the value in // the rotate field. shift := ((cpuCurrentOpcode shr 8) and $F) shl 1; operand2 := cpuCurrentOpcode and $FF; barrelCarry := operand2 and (1 shl (shift-1)) <> 0; operand2 := (operand2 shr shift) or (operand2 shl (32-shift)); // Perform the data processing operation DataProcessing(false, operand2); end else if cpuCurrentOpcode and SINGLE_DATA_SWAP_MASK = SINGLE_DATA_SWAP_SIG then SingleDataSwap else if cpuCurrentOpcode and HW_XFER_REGOFS_MASK = HW_XFER_REGOFS_SIG then HalfwordXfer else if cpuCurrentOpcode and HW_XFER_IMMOFS_MASK = HW_XFER_IMMOFS_SIG then HalfwordXfer else if cpuCurrentOpcode and UNDEFINED_MASK = UNDEFINED_SIG then UndefinedOpcode else if cpuCurrentOpcode and SINGLE_DATA_XFER_MASK = SINGLE_DATA_XFER_SIG then SingleDataTransfer else if cpuCurrentOpcode and BLOCK_DATA_XFER_MASK = BLOCK_DATA_XFER_SIG then BlockDataTransfer else if cpuCurrentOpcode and SWI_MASK = SWI_SIG then SoftwareInterrupt else if cpuCurrentOpcode and COPRO_DATA_XFER_MASK = COPRO_DATA_XFER_SIG then UndefinedOpcode else if cpuCurrentOpcode and COPRO_DATA_OP_MASK = COPRO_DATA_OP_SIG then UndefinedOpcode else if cpuCurrentOpcode and COPRO_REG_XFER_MASK = COPRO_REG_XFER_SIG then UndefinedOpcode else UndefinedState('ARM Decoder: Does not match any known instruction class'); end; if memStopAtAddy(regs[R15] - 4) then begin hitBreakpoint := true; Exit; end; until (quota <= 0) or thumbMode; if thumbMode and not haveFlippedThumb then hitBreakpoint := memStopAtAddy(regs[R15] - 2); end; ////////////////////////////////////////////////////////////////////// end. //////////////////////////////////////////////////////////////////////