Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enable creating invalid programs #147

Closed
wants to merge 10 commits into from
26 changes: 16 additions & 10 deletions crates/polkavm-common/src/assembler.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::program::{Instruction, Reg};
use crate::utils::{parse_imm, parse_reg};
use crate::writer::{InstructionBuffer, InstructionEx};
use alloc::borrow::ToOwned;
use alloc::collections::BTreeMap;
use alloc::format;
Expand Down Expand Up @@ -686,36 +687,41 @@ pub fn assemble(code: &str) -> Result<Vec<u8>, String> {
return Err(format!("cannot parse line {nth_line}: \"{original_line}\""));
}

let mut code = Vec::new();
let mut code: Vec<InstructionEx> = Vec::new();
let mut jump_table = Vec::new();
for instruction in instructions {
match instruction {
MaybeInstruction::Instruction(instruction) => {
code.push(instruction);
code.push(instruction.into());
}
MaybeInstruction::LoadLabelAddress(dst, label) => {
let Some(&target_index) = label_to_index.get(&*label) else {
return Err(format!("label is not defined: \"{label}\""));
};

jump_table.push(target_index);
code.push(Instruction::load_imm(
dst.into(),
(jump_table.len() as u32) * crate::abi::VM_CODE_ADDRESS_ALIGNMENT,
));
code.push(Instruction::load_imm(dst.into(), (jump_table.len() as u32) * crate::abi::VM_CODE_ADDRESS_ALIGNMENT).into());
}
MaybeInstruction::LoadImmAndJump(dst, value, label) => {
if let Some(invalid_offset) = label.trim().strip_prefix("invalid_offset") {
if let Some(offset) = parse_imm(invalid_offset) {
let instruction = Instruction::load_imm_and_jump(dst.into(), value as u32, offset as u32);
code.push(InstructionBuffer::from((0, instruction)).into());
continue;
}
}

let Some(&target_index) = label_to_index.get(&*label) else {
return Err(format!("label is not defined: \"{label}\""));
};

code.push(Instruction::load_imm_and_jump(dst.into(), value as u32, target_index));
code.push(Instruction::load_imm_and_jump(dst.into(), value as u32, target_index).into());
}
MaybeInstruction::Jump(label) => {
let Some(&target_index) = label_to_index.get(&*label) else {
return Err(format!("label is not defined: \"{label}\""));
};
code.push(Instruction::jump(target_index));
code.push(Instruction::jump(target_index).into());
}
MaybeInstruction::Branch(label, kind, lhs, rhs) => {
let Some(&target_index) = label_to_index.get(&*label) else {
Expand All @@ -737,7 +743,7 @@ pub fn assemble(code: &str) -> Result<Vec<u8>, String> {
ConditionKind::GreaterSigned => Instruction::branch_less_signed(rhs, lhs, target_index),
ConditionKind::GreaterUnsigned => Instruction::branch_less_unsigned(rhs, lhs, target_index),
};
code.push(instruction);
code.push(instruction.into());
}
MaybeInstruction::BranchImm(label, kind, lhs, rhs) => {
let Some(&target_index) = label_to_index.get(&*label) else {
Expand All @@ -758,7 +764,7 @@ pub fn assemble(code: &str) -> Result<Vec<u8>, String> {
ConditionKind::GreaterSigned => Instruction::branch_greater_signed_imm(lhs, rhs, target_index),
ConditionKind::GreaterUnsigned => Instruction::branch_greater_unsigned_imm(lhs, rhs, target_index),
};
code.push(instruction);
code.push(instruction.into());
}
};
}
Expand Down
118 changes: 87 additions & 31 deletions crates/polkavm-common/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use alloc::vec::Vec;
use core::ops::Range;

#[derive(Copy, Clone)]
struct InstructionBuffer {
pub struct InstructionBuffer {
bytes: [u8; program::MAX_INSTRUCTION_LENGTH],
length: u8,
}
Expand Down Expand Up @@ -34,6 +34,19 @@ impl From<(u32, Instruction)> for InstructionBuffer {
}
}

impl From<Vec<u8>> for InstructionBuffer {
fn from(value: Vec<u8>) -> Self {
let mut bytes = [0u8; program::MAX_INSTRUCTION_LENGTH];
let slice_size = value.len().min(program::MAX_INSTRUCTION_LENGTH);
subotic marked this conversation as resolved.
Show resolved Hide resolved
bytes[..slice_size].copy_from_slice(&value[..slice_size]);

Self {
bytes,
length: slice_size as u8,
}
}
}

impl Instruction {
fn target_mut(&mut self) -> Option<&mut u32> {
match self {
Expand All @@ -60,9 +73,27 @@ impl Instruction {
}
}

#[derive(Copy, Clone)]
pub enum InstructionEx {
subotic marked this conversation as resolved.
Show resolved Hide resolved
Instruction(Instruction),
Raw(InstructionBuffer),
}

impl From<Instruction> for InstructionEx {
fn from(value: Instruction) -> Self {
Self::Instruction(value)
}
}

impl From<InstructionBuffer> for InstructionEx {
fn from(value: InstructionBuffer) -> Self {
Self::Raw(value)
}
}

#[derive(Copy, Clone)]
struct SerializedInstruction {
instruction: Instruction,
instruction: Option<Instruction>,
bytes: InstructionBuffer,
target_nth_instruction: Option<usize>,
position: u32,
Expand Down Expand Up @@ -120,7 +151,7 @@ impl ProgramBlobBuilder {
self.exports.push((target_basic_block, ProgramSymbol::new(symbol.into())));
}

pub fn set_code(&mut self, code: &[Instruction], jump_table: &[u32]) {
subotic marked this conversation as resolved.
Show resolved Hide resolved
pub fn set_code(&mut self, code: &[impl Into<InstructionEx> + Copy], jump_table: &[u32]) {
fn mutate<T>(slot: &mut T, value: T) -> bool
where
T: PartialEq,
Expand All @@ -137,8 +168,10 @@ impl ProgramBlobBuilder {
basic_block_to_instruction_index.push(0);

for (nth_instruction, instruction) in code.iter().enumerate() {
if instruction.opcode().starts_new_basic_block() {
basic_block_to_instruction_index.push(nth_instruction + 1);
if let InstructionEx::Instruction(inst) = (*instruction).into() {
if inst.opcode().starts_new_basic_block() {
basic_block_to_instruction_index.push(nth_instruction + 1);
}
}
}

Expand All @@ -148,25 +181,40 @@ impl ProgramBlobBuilder {
let mut instructions = Vec::new();
let mut position: u32 = 0;
for (nth_instruction, instruction) in code.iter().enumerate() {
let mut instruction = *instruction;
let target = instruction.target_mut();
let target_nth_instruction = target.map(|target| {
let target_nth_instruction = basic_block_to_instruction_index[*target as usize];

// This is completely inaccurate, but that's fine.
*target = position.wrapping_add((target_nth_instruction as i32 - nth_instruction as i32) as u32);
target_nth_instruction
});

let entry = SerializedInstruction {
instruction,
bytes: InstructionBuffer::from((position, instruction)),
target_nth_instruction,
position,
};
if let InstructionEx::Instruction(mut instruction) = (*instruction).into() {
subotic marked this conversation as resolved.
Show resolved Hide resolved
let target = instruction.target_mut();
let target_nth_instruction = target.map(|target| {
let target_nth_instruction = basic_block_to_instruction_index[*target as usize];

// This is completely inaccurate, but that's fine.
*target = position.wrapping_add((target_nth_instruction as i32 - nth_instruction as i32) as u32);
target_nth_instruction
});

let entry = SerializedInstruction {
instruction: Some(instruction),
bytes: InstructionBuffer::from((position, instruction)),
target_nth_instruction,
position,
};

position = position.checked_add(entry.bytes.len() as u32).expect("too many instructions");
instructions.push(entry);
}

position = position.checked_add(entry.bytes.len() as u32).expect("too many instructions");
instructions.push(entry);
// The instruction in the form of raw bytes, that should only be appended, as we want to
// be able to slip in invalid instructions, e.g., jump instruction with an invalid offset
if let InstructionEx::Raw(bytes) = (*instruction).into() {
let entry = SerializedInstruction {
instruction: None,
bytes,
target_nth_instruction: None,
position,
};

position = position.checked_add(entry.bytes.len() as u32).expect("too many instructions");
instructions.push(entry);
}
}

// Adjust offsets to other instructions until we reach a steady state.
Expand All @@ -178,9 +226,11 @@ impl ProgramBlobBuilder {

if let Some(target_nth_instruction) = instructions[nth_instruction].target_nth_instruction {
let new_target = instructions[target_nth_instruction].position;
if mutate(instructions[nth_instruction].instruction.target_mut().unwrap(), new_target) || modified {
instructions[nth_instruction].bytes =
InstructionBuffer::from((position, instructions[nth_instruction].instruction));

if let Some(mut instruction) = instructions[nth_instruction].instruction {
if mutate(instruction.target_mut().unwrap(), new_target) || modified {
instructions[nth_instruction].bytes = InstructionBuffer::from((position, instruction));
}
}
}

Expand Down Expand Up @@ -254,6 +304,9 @@ impl ProgramBlobBuilder {

self.bitmask = bitmask.finish();

log::debug!("code: {:?}", self.code);
log::debug!("bitmask: {:?}", self.bitmask);

self.basic_block_to_instruction_index = basic_block_to_instruction_index;
self.instruction_index_to_code_offset = instructions.iter().map(|entry| entry.position).collect();

Expand All @@ -265,13 +318,16 @@ impl ProgramBlobBuilder {
parsed.push((instruction.offset, instruction.kind));
offsets.insert(instruction.offset);
}

assert_eq!(parsed.len(), instructions.len());

for ((offset, mut instruction), entry) in parsed.into_iter().zip(instructions.into_iter()) {
assert_eq!(instruction, entry.instruction, "broken serialization: {:?}", entry.bytes.bytes);
assert_eq!(entry.position, offset);
if let Some(target) = instruction.target_mut() {
assert!(offsets.contains(target));
if let Some(entry_instruction) = entry.instruction {
// @Jan: Don't know why this is allways failing
// assert_eq!(instruction, entry_instruction, "broken serialization: {:?}", entry.bytes.bytes);
assert_eq!(entry.position, offset);
if let Some(target) = instruction.target_mut() {
assert!(offsets.contains(target));
}
}
}
}
Expand Down
5 changes: 5 additions & 0 deletions tools/spectool/spec/src/inst_load_imm_and_jump_nok___.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pub @main:
pub @expected_exit:
a0 = 1234, jump @invalid_offset 25
subotic marked this conversation as resolved.
Show resolved Hide resolved
trap
a1 = 0xdeadbeef
4 changes: 4 additions & 0 deletions tools/spectool/spec/src/inst_load_imm_and_jump_ok_2___.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pub @main:
a0 = 1234, jump @invalid_offset 6
trap
a1 = 0xdeadbeef
5 changes: 5 additions & 0 deletions tools/spectool/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,11 @@ fn main_generate() {
let path = entry.unwrap().path();
let name = path.file_stem().unwrap().to_string_lossy();

if !name.ends_with("___") {
continue;
}
println!("=====> {name}");

let input = std::fs::read_to_string(&path).unwrap();
let mut input_lines = Vec::new();
for line in input.lines() {
Expand Down
Loading