diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..ac8248c --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "external/gtest"] + path = external/gtest + url = https://github.com/google/googletest.git diff --git a/README.md b/README.md new file mode 100644 index 0000000..5cabedf --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +# CPU Security Emulator [x86] +This is mostly a learning project for me. I am learning about the internals of a CPU and how it encodes/decodes instructions to produce the intended results. + +As of now, there are limitied features, and supported instructions are limited. + +See; for an exhaustive list of supported instructions and specific opcodes. + +## Planned emulator features +- [ ] SIB Byte Support for existing and future instructions. +- [ ] Execution Lock and Permission system for RAM. +- [ ] Branch Prediction and Speculative Execution //Long-term plan +- [ ] SIMD flattening (optional) and AV redirection (optional). +- [ ] Buffer overflow prediction, detection, and reporting. + +## Planned architectural features +- [ ] Better logging system +- [ ] Comprehensive unit tests for each instruction. + +## Planned instructions +TBD + +## Requirements +- gcc +Rest; TBD diff --git a/external/gtest b/external/gtest new file mode 160000 index 0000000..872d386 --- /dev/null +++ b/external/gtest @@ -0,0 +1 @@ +Subproject commit 872d386a87799a9f5ae1a802759b98ac943d2161 diff --git a/src/Bus.cpp b/src/Bus.cpp index 60e9780..46601b1 100644 --- a/src/Bus.cpp +++ b/src/Bus.cpp @@ -5,6 +5,6 @@ Bus::Bus() { for(int i = 0; i < 992 * 1024; i++) { - m_RAM[i] = 0; + m_RAM[i] = 0x90; } } diff --git a/src/Bus.h b/src/Bus.h index 7300141..d6912b9 100644 --- a/src/Bus.h +++ b/src/Bus.h @@ -4,6 +4,7 @@ #include #include #include +#include class Bus { public: @@ -15,25 +16,39 @@ public: void WriteX(uint64_t address, T value) { static_assert(std::is_unsigned_v, "T must be an unsigned int of any size smaller than 8 bytes!"); - T& loc = AccessX(address); - loc = value; + + switch(address) + { + case 0x00008000 ... 0x000FFFFF: + { + uint64_t offset = address - 0x00008000; + std::memcpy(&m_RAM[offset], &value, sizeof(T)); + break; + } + default: + throw std::runtime_error("Illegal Access!"); + } } template - T& AccessX(uint64_t address) + T AccessX(uint64_t address) { static_assert(std::is_unsigned_v, "T must be an unsigned int of any size smaller than 8 bytes!"); std::cout << "Bus access: " << std::hex << address << std::endl; - - if (address >= 0x00008000 && address <= 0x000FFFFF) + + switch(address) + { + case 0x00008000 ... 0x000FFFFF: { uint64_t offset = address - 0x00008000; - T* entry = reinterpret_cast(&m_RAM[offset]); - return entry[0]; + T value; + std::memcpy(&value, &m_RAM[offset], sizeof(T)); + return value; + } + default: + throw std::runtime_error("Illegal Access!"); } - - throw std::runtime_error("Illegal Access!"); } private: diff --git a/src/CPU.cpp b/src/CPU.cpp index 09990f4..b7e973c 100644 --- a/src/CPU.cpp +++ b/src/CPU.cpp @@ -5,8 +5,9 @@ #include #include #include +#include -CPU::CPU(std::shared_ptr bus) : m_Bus(bus), m_Context({m_Instruction, m_InstructionPointer, m_Flags, m_Registers, m_Bus}){ +CPU::CPU(std::shared_ptr bus) : m_Bus(bus), m_IsHalted(false), m_Context({m_Instruction, m_InstructionPointer, m_Flags, m_Registers, m_Bus, m_IsHalted}) { m_InstructionPointer = 0x00008000; for(int i = 0; i < 16; i++) @@ -15,48 +16,39 @@ CPU::CPU(std::shared_ptr bus) : m_Bus(bus), m_Context({m_Instruction, m_Ins } } -void CPU::Step(){ +void CPU::Step() { FetchDecode(); Execute(); } -void CPU::FetchDecode(){ - std::cout << "Fetching instruction: " << std::hex << m_InstructionPointer << std::endl; - m_InstructionRaw = m_Bus->AccessX(m_InstructionPointer); // Slice of 8 bytes. +void CPU::FetchDecode() { + uint8_t opcode_raw = m_Bus->AccessX(m_InstructionPointer); + Opcode opcode = static_cast(opcode_raw); - std::cout << "Context window fetched: " << std::hex << m_InstructionRaw << std::endl; // Start Decode Instruction - uint64_t first_byte = m_InstructionRaw & 0xFF; - std::cout << "Decoded first byte: " << std::hex << first_byte << std::endl; - Opcode opcode = static_cast(first_byte); - - if(first_byte >= 0xB8 && first_byte <= 0xBF) - { - m_Instruction.m_Operand1 = first_byte - 0xB8; - opcode = Opcode::MOV_R_IMM32; - } - - switch(opcode){ - case Opcode::NOP: m_Instruction.m_Length = 1; break; - case Opcode::HLT: m_Instruction.m_Length = 1; break; - case Opcode::MOV_R_IMM32: + switch(opcode_raw) { + case Opcode::MOV_R_IMM32 ... 0xBF: // 0xB8 to 0xBF + m_Instruction.m_Opcode = Opcode::MOV_R_IMM32; + m_Instruction.m_Operand1 = opcode_raw - 0xB8; m_Instruction.m_Operand2 = m_Bus->AccessX(m_InstructionPointer + 1); m_Instruction.m_Length = 5; break; - case Opcode::ADD_RM32_R32: - m_Instruction.m_Operand1 = m_Bus->AccessX(m_InstructionPointer + 1); - m_Instruction.m_Operand2 = m_Bus->AccessX(m_InstructionPointer + 2); - m_Instruction.m_Length = 6; + case Opcode::NOP: + case Opcode::HLT: + m_Instruction.m_Opcode = opcode; + m_Instruction.m_Length = 1; break; - default: - std::runtime_error("Decode encountered unexpected opcode."); + case Opcode::ADD_RM32_R32: + m_Instruction.m_Opcode = opcode; + m_Instruction.optional.m_ModRM = x86::process_modrm(m_Bus->AccessX(m_InstructionPointer + 1)); + m_Instruction.m_Length = 2; + FetchModRMFields(); break; } - m_Instruction.m_Opcode = opcode; m_InstructionPointer += m_Instruction.m_Length; } -void CPU::Execute(){ +void CPU::Execute() { std::cout << "Executing... \n"; uint8_t opcode_value = static_cast(m_Instruction.m_Opcode); auto& exec_table = GetExecutorTable(); @@ -67,3 +59,24 @@ void CPU::Execute(){ } throw std::runtime_error("Opcode not found!"); } + +void CPU::FetchModRMFields() { + assert(m_Instruction.m_Length != 0); // FetchDecode() must set m_Length before calling FetchModRMFields() + x86::ModRMState state = m_Instruction.optional.m_ModRM.m_State; + switch(state) { + case x86::ModRMState::LR: + case x86::ModRMState::R: + break; + case x86::ModRMState::DISP32: + case x86::ModRMState::LR_DISP32: + m_Instruction.m_Operand1 = m_Bus->AccessX(m_InstructionPointer + m_Instruction.m_Length); + m_Instruction.m_Length += 4; + break; + case x86::ModRMState::LR_DISP8: + m_Instruction.m_Operand1 = m_Bus->AccessX(m_InstructionPointer + m_Instruction.m_Length); + m_Instruction.m_Length += 1; + break; + default: + throw std::runtime_error("Instruction could not be modified according to the modrm field!"); + } +} diff --git a/src/CPU.h b/src/CPU.h index e01a36b..7a0db13 100644 --- a/src/CPU.h +++ b/src/CPU.h @@ -20,14 +20,18 @@ private: void Execute(); private: - uint64_t m_Registers[16]; - uint64_t m_InstructionPointer; - uint64_t m_Flags; - + uint32_t m_Registers[16]; + uint32_t m_InstructionPointer; + uint32_t m_Flags; + bool m_IsHalted; + std::shared_ptr m_Bus; uint64_t m_InstructionRaw; Instruction m_Instruction; CPUContext m_Context; +private: + // FetchDecode() must set m_Length before calling FetchModRMFields() + void FetchModRMFields(); }; diff --git a/src/ExecutorCases.cpp b/src/ExecutorCases.cpp index f05d330..d1446c4 100644 --- a/src/ExecutorCases.cpp +++ b/src/ExecutorCases.cpp @@ -6,41 +6,11 @@ #include #include -CPUContext::CPUContext(Instruction& i, uint64_t& ip, uint64_t& flags, uint64_t* reg, std::shared_ptr& bus) : m_Instruction(i), m_InstructionPointer(ip), m_Flags(flags), m_Registers(reg), m_Bus(bus) { } +CPUContext::CPUContext(Instruction& i, uint32_t& ip, uint32_t& flags, uint32_t* reg, std::shared_ptr& bus, bool& isHalted) : m_Instruction(i), m_InstructionPointer(ip), m_Flags(flags), m_Registers(reg), m_Bus(bus), m_IsHalted(isHalted) { } CPUContext::~CPUContext() = default; // NO SIB SUPPORT YET -ModRM process_modrm(uint8_t modrm){ - uint8_t mod_mask = 0b11000000; - uint8_t reg_mask = 0b00111000; - uint8_t rm_mask = 0b00000111; - - uint8_t mod = modrm & mod_mask; - uint8_t reg = (modrm & reg_mask) >> 3; - uint8_t rm = modrm & rm_mask; - - ModRMState state = ModRMState::INVALID; - - switch(mod) { - case 0b00000000: - state = ModRMState::LR; - break; - case 0b01000000: - state = ModRMState::LR_DISP8; - break; - case 0b10000000: - state = ModRMState::LR_DISP32; - break; - case 0b11000000: - state = ModRMState::R; - break; - default: - throw std::runtime_error("Mod R/M does not support non-register operands right now!"); - } - return {.m_State = state, .m_Reg = reg, .m_Rm = rm}; -} - namespace executor_cases { void Nop(CPUContext& cc){ std::cout << "No op" << std::endl; @@ -53,32 +23,33 @@ namespace executor_cases { void Mov_r32_imm32(CPUContext& cc){ cc.m_Registers[cc.m_Instruction.m_Operand1] = cc.m_Instruction.m_Operand2; - std::cout << "Contents of " << x86::Register2Str((x86::Register)cc.m_Instruction.m_Operand1) << " changed to " << cc.m_Registers[cc.m_Instruction.m_Operand1] << std::endl; } - //NO SIB SUPPORT YET void Add_rm32_r32(CPUContext& cc){ - ModRM modrm = process_modrm(cc.m_Instruction.m_Operand1); + x86::ModRM modrm = cc.m_Instruction.optional.m_ModRM; switch(modrm.m_State) { - case ModRMState::R: + case x86::ModRMState::R: { cc.m_Registers[modrm.m_Rm] += cc.m_Registers[modrm.m_Reg]; - std::cout << "Adding " << x86::Register2Str((x86::Register)modrm.m_Reg) << " to " << x86::Register2Str((x86::Register)modrm.m_Rm) << std::endl; break; } - case ModRMState::LR: + case x86::ModRMState::LR: { uint32_t dstPrevValue = cc.m_Bus->AccessX(cc.m_Registers[modrm.m_Rm]); uint32_t currRegValue = cc.m_Registers[modrm.m_Reg]; uint32_t result = dstPrevValue + currRegValue; cc.m_Bus->WriteX(cc.m_Registers[modrm.m_Rm], result); - std::cout << "Memory address " << std::hex << cc.m_Registers[modrm.m_Rm] << " modified to: " << result << std::endl; break; } - case ModRMState::LR_DISP8: + case x86::ModRMState::LR_DISP8: { - + uint32_t dstAddress = cc.m_Registers[modrm.m_Rm] + cc.m_Instruction.m_Operand2; + uint32_t dstPrevValue = cc.m_Bus->AccessX(dstAddress); + uint32_t currRegValue = cc.m_Registers[modrm.m_Reg]; + uint32_t result = dstPrevValue + currRegValue; + cc.m_Bus->WriteX(dstAddress, result); + break; } default: { diff --git a/src/ExecutorCases.h b/src/ExecutorCases.h index 37b4b3e..92a13f7 100644 --- a/src/ExecutorCases.h +++ b/src/ExecutorCases.h @@ -9,30 +9,16 @@ class Bus; struct CPUContext { Instruction& m_Instruction; - uint64_t& m_InstructionPointer; - uint64_t& m_Flags; - uint64_t* m_Registers; + uint32_t& m_InstructionPointer; + uint32_t& m_Flags; + uint32_t* m_Registers; std::shared_ptr m_Bus; - - CPUContext(Instruction& i, uint64_t& ip, uint64_t& flags, uint64_t* reg, std::shared_ptr& bus); + bool& m_IsHalted; + + CPUContext(Instruction& i, uint32_t& ip, uint32_t& flags, uint32_t* reg, std::shared_ptr& bus, bool& isHalted); ~CPUContext(); }; -enum class ModRMState : uint8_t -{ - INVALID = 0, - LR = 1, - LR_DISP8 = 2, - LR_DISP32 = 3, - R = 4 -}; - -struct ModRM{ - ModRMState m_State; - uint8_t m_Reg; - uint8_t m_Rm; -}; - typedef void (*ExecutorCase)(CPUContext&); const std::array& GetExecutorTable(); diff --git a/src/Instruction.cpp b/src/Instruction.cpp index 7c7d9c0..0dd8ee4 100644 --- a/src/Instruction.cpp +++ b/src/Instruction.cpp @@ -16,4 +16,47 @@ namespace x86 { } throw std::runtime_error("Register not found!"); } + // Does not support SIB yet! + ModRM process_modrm(uint8_t modrm){ + uint8_t mod_mask = 0b11000000; + uint8_t reg_mask = 0b00111000; + uint8_t rm_mask = 0b00000111; + + uint8_t mod = modrm & mod_mask; + uint8_t reg = (modrm & reg_mask) >> 3; + uint8_t rm = modrm & rm_mask; + + ModRMState state = ModRMState::INVALID; + + switch(mod) { + case 0b00000000: + state = ModRMState::LR; + if(reg == 0b00000101) + state = ModRMState::DISP32; + break; + case 0b01000000: + state = ModRMState::LR_DISP8; + break; + case 0b10000000: + state = ModRMState::LR_DISP32; + break; + case 0b11000000: + state = ModRMState::R; + break; + default: + throw std::runtime_error("Mod R/M does not support non-register operands right now!"); + } + + return {.m_State = state, .m_Reg = reg, .m_Rm = rm}; + } +} + +std::string Opcode2Str(Opcode op) { + switch(op) { + case Opcode::ADD_RM32_R32: return "ADD_RM32_R32"; + case Opcode::MOV_R_IMM32: return "MOV_R_IMM32"; + case Opcode::NOP: return "NOP"; + case Opcode::HLT: return "HLT"; + } + throw std::runtime_error("Opcode Invalid!"); } diff --git a/src/Instruction.h b/src/Instruction.h index cf4fbdc..27fd774 100644 --- a/src/Instruction.h +++ b/src/Instruction.h @@ -3,12 +3,33 @@ #include #include +#include "ExecutorCases.h" + namespace x86 { enum Register : uint8_t { EAX = 0, ECX = 1, EDX = 2, EBX = 3, ESP = 4, EBP = 5, ESI = 6, EDI = 7 }; - + + enum class ModRMState : uint8_t + { + INVALID = 0, + LR = 1, + LR_DISP8 = 2, + LR_DISP32 = 3, + DISP32 = 4, + R = 5 + }; + + struct ModRM{ + ModRMState m_State; + uint8_t m_Reg; + uint8_t m_Rm; + }; + + ModRM process_modrm(uint8_t modrm); + + // Helpers std::string Register2Str(x86::Register reg); } @@ -19,9 +40,14 @@ enum Opcode : uint8_t { ADD_RM32_R32 = 0x01, }; +std::string Opcode2Str(Opcode op); + struct Instruction{ Opcode m_Opcode; size_t m_Length; - uint64_t m_Operand1; - uint64_t m_Operand2; + uint32_t m_Operand1; + uint32_t m_Operand2; + union { + x86::ModRM m_ModRM; + } optional; }; diff --git a/src/Metal.cpp b/src/Metal.cpp index 229c1e8..6b606ef 100644 --- a/src/Metal.cpp +++ b/src/Metal.cpp @@ -5,10 +5,10 @@ Metal::Metal() : m_Bus(std::make_shared()), m_CPU(m_Bus) { } void Metal::Upload2Memory(uint8_t bytes[], size_t len) { - uint64_t start = 0x8000; + uint64_t start = 0x00008000; for (size_t i = 0; i < len; i++) { - m_Bus->WriteX(start + i, bytes[i]); - std::cout << "Written " << bytes[i] << " to " << std::hex << start + i << std::endl; + m_Bus->WriteX(start + i, bytes[i]); + std::cout << "Written " << std::hex << bytes[i] << " to " << std::hex << start + i << std::endl; } } diff --git a/src/Metal.h b/src/Metal.h index e5405fa..6b79c60 100644 --- a/src/Metal.h +++ b/src/Metal.h @@ -13,7 +13,6 @@ class Metal { ~Metal() = default; void Upload2Memory(uint8_t bytes[], size_t len); - void Run(); private: diff --git a/src/main.cpp b/src/main.cpp index 537eea4..cdc32ff 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -2,13 +2,19 @@ #include "Metal.h" +// mov eax, 0xF4 +// mov ecx, 0x800c +// add [ecx], eax uint8_t test[] = { - 0x90, 0xF4 + 0xB8, 0xF4, 0x00, 0x00, 0x00, + 0xB9, 0x0c, 0x80, 0x00, 0x00, + 0x01, 0x01, + 0x00, }; int main(int argc, char** argv) { Metal metal; - metal.Upload2Memory(test, 2); + metal.Upload2Memory(test, 13); metal.Run(); return 0; }