diff --git a/Cargo.lock b/Cargo.lock index 665a9cc..35b1621 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -224,8 +224,8 @@ dependencies = [ ] [[package]] -name = "creator-compiler" -version = "1.1.0" +name = "creator-assembler" +version = "2.0.0" dependencies = [ "ansi-to-html", "ariadne", diff --git a/Cargo.toml b/Cargo.toml index 8492181..1584002 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,9 @@ [package] -name = "creator-compiler" -version = "1.1.0" +name = "creator-assembler" +version = "2.0.0" edition = "2021" license = "LGPL-2.1-or-later" -description = "A reimplementation of Creator (https://creatorsim.github.io/)'s compiler" +description = "A reimplementation of Creator (https://creatorsim.github.io/)'s assembler" categories = ["command-line-utilities", "compilers"] keywords = ["compiler", "assembly", "assembler", "Creator"] rust-version = "1.89" @@ -17,7 +17,7 @@ crate-type = [ ] [[bin]] -name = "creator-compiler" +name = "creator-assembler" required-features = ["cli"] [features] diff --git a/README.md b/README.md index 8f22380..0e5694a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# Creator Compiler +# CREATOR Assembler -Reimplementation of the compiler used by [Creator](https://creatorsim.github.io/) +Reimplementation of the assembler used by [CREATOR](https://creatorsim.github.io/) to have better performance, more helpful error messages, and a more correct output. ## Building @@ -11,20 +11,20 @@ The only requirement is the rust toolchain, which can be installed through [`rus ### Running locally (CLI) -The compiler can be built from source using `cargo build --release`, which will -place the binary in `./target/release/creator-compiler`. The `--release` flag can +The assembler can be built from source using `cargo build --release`, which will +place the binary in `./target/release/creator-assembler`. The `--release` flag can be omitted to generate debug binaries. Additionally, `cargo run --release -- []` can be used as a shortcut to build and run the binary. Running the application without arguments provides a short description of the application and subcommands, -and using `creator-compiler help ` provides a description and usage +and using `creator-assembler help ` provides a description and usage instructions for each command. -The compiler currently supports 3 modes of execution: +The assembler currently supports 3 modes of execution: -- Print architecture specification schema to `stdout`: `creator-compiler schema` -- Validate architecture specification file: `creator-compiler validate ` +- Print architecture specification schema to `stdout`: `creator-assembler schema` +- Validate architecture specification file: `creator-assembler validate ` - Compile assembly input and print the result to `stdout`: - `creator-compiler compile ` + `creator-assembler compile ` - The `-v`/`--verbose` flag can be used to also print the parsed AST ### JS Bindings @@ -46,4 +46,4 @@ use the generated package, both for the web and Node.js: which allows loading the page at `localhost:8080/js_example`. - `web.js`: main module for the web example, shows how to load the package in the web - `node.js`: main module for the Node.js example, shows how to load the package in Node.js -- `compiler.mjs`: module responsible for interaction with the package, shows how to use the provided API +- `assembler.mjs`: module responsible for interaction with the package, shows how to use the provided API diff --git a/benches/arch.json b/benches/arch.json index c33e3b7..ee9041d 100644 --- a/benches/arch.json +++ b/benches/arch.json @@ -1,42 +1,15 @@ { - "arch_conf": [ - { - "name": "Name", - "value": "Test" - }, - { - "name": "Bits", - "value": "32" - }, - { - "name": "Description", - "value": "Test architecture" - }, - { - "name": "Data Format", - "value": "big_endian" - }, - { - "name": "Memory Alignment", - "value": "1" - }, - { - "name": "Main Function", - "value": "main" - }, - { - "name": "Passing Convention", - "value": "1" - }, - { - "name": "Sensitive Register Name", - "value": "1" - }, - { - "name": "CommentPrefix", - "value": "#" - } - ], + "config": { + "name": "Test", + "word_size": 32, + "description": "Test architecture", + "endianness": "big_endian", + "memory_alignment": true, + "main_function": "main", + "passing_convention": true, + "sensitive_register_name": true, + "comment_prefix": "#" + }, "components": [ { "name": "Control registers", @@ -44,28 +17,25 @@ "double_precision": false, "elements": [ { - "name": [ - "PC" - ], - "nbits": "32", + "name": ["PC"], + "nbits": 32, + "encoding": 0, "value": 0, "default_value": 0, "properties": [] }, { - "name": [ - "ctrl1" - ], - "nbits": "32", + "name": ["ctrl1"], + "nbits": 32, + "encoding": 1, "value": 0, "default_value": 0, "properties": [] }, { - "name": [ - "ctrl2" - ], - "nbits": "32", + "name": ["ctrl2"], + "nbits": 32, + "encoding": 2, "value": 0, "default_value": 0, "properties": [] @@ -78,31 +48,25 @@ "double_precision": false, "elements": [ { - "name": [ - "x0" - ], - "nbits": "32", + "name": ["x0"], + "nbits": 32, + "encoding": 0, "value": 0, "default_value": 0, "properties": [] }, { - "name": [ - "x1", - "one" - ], - "nbits": "32", + "name": ["x1", "one"], + "nbits": 32, + "encoding": 1, "value": 0, "default_value": 0, "properties": [] }, { - "name": [ - "x2", - "two", - "2" - ], - "nbits": "32", + "name": ["x2", "two", "2"], + "nbits": 32, + "encoding": 2, "value": 0, "default_value": 0, "properties": [] @@ -113,33 +77,27 @@ "name": "Floating point registers", "type": "fp_registers", "double_precision": true, - "double_precision_type": "extended", "elements": [ { - "name": [ - "ft0" - ], - "nbits": "64", + "name": ["ft0"], + "nbits": 64, + "encoding": 0, "value": 0.0, "default_value": 0.0, "properties": [] }, { - "name": [ - "F1", - "ft1" - ], - "nbits": "64", + "name": ["F1", "ft1"], + "nbits": 64, + "encoding": 1, "value": 0.0, "default_value": 0.0, "properties": [] }, { - "name": [ - "Field2", - "ft2" - ], - "nbits": "64", + "name": ["Field2", "ft2"], + "nbits": 64, + "encoding": 2, "value": 0.0, "default_value": 0.0, "properties": [] @@ -152,77 +110,27 @@ "double_precision": false, "elements": [ { - "name": [ - "fs0" - ], - "nbits": "32", - "value": 0.0, - "default_value": 0.0, - "properties": [] - }, - { - "name": [ - "fs1" - ], - "nbits": "32", - "value": 0.0, - "default_value": 0.0, - "properties": [] - }, - { - "name": [ - "fs2" - ], - "nbits": "32", - "value": 0.0, - "default_value": 0.0, - "properties": [] - } - ] - }, - { - "name": "Double floating point registers", - "type": "fp_registers", - "double_precision": true, - "double_precision_type": "linked", - "elements": [ - { - "name": [ - "FD0" - ], - "nbits": "64", + "name": ["fs0"], + "nbits": 32, + "encoding": 0, "value": 0.0, "default_value": 0.0, - "simple_reg": [ - "fs0", - "fs1" - ], "properties": [] }, { - "name": [ - "FD1" - ], - "nbits": "64", + "name": ["fs1"], + "nbits": 32, + "encoding": 1, "value": 0.0, "default_value": 0.0, - "simple_reg": [ - "fs1", - "fs2" - ], "properties": [] }, { - "name": [ - "FD2" - ], - "nbits": "64", + "name": ["fs2"], + "nbits": 32, + "encoding": 2, "value": 0.0, "default_value": 0.0, - "simple_reg": [ - "fs2", - "fs3" - ], "properties": [] } ] @@ -233,8 +141,6 @@ "name": "nop", "type": "Arithmetic integer", "signature_definition": "F0", - "signature": "nop", - "signatureRaw": "nop", "co": "1111111", "cop": "0000000000", "nwords": 1, @@ -251,19 +157,15 @@ "type": "cop", "startbit": 31, "stopbit": 28, - "valueField": "1111" + "value": "1111" } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "nop2", "type": "Arithmetic integer", "signature_definition": "F0", - "signature": "nop2", - "signatureRaw": "nop2", "co": "1000001", "cop": "0000000000", "nwords": 2, @@ -280,19 +182,15 @@ "type": "cop", "startbit": 63, "stopbit": 60, - "valueField": "1001" + "value": "1001" } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "imm", "type": "Arithmetic integer", - "signature_definition": "F0 F1 F2 F3", - "signature": "inm,inm-signed,inm-unsigned,address", - "signatureRaw": "inm inms inmu addr", + "signature_definition": "F0 F1, F2, F3", "co": "0000000", "cop": "0000000000", "nwords": 1, @@ -305,14 +203,14 @@ "stopbit": 0 }, { - "name": "inms", - "type": "inm-signed", + "name": "imms", + "type": "imm-signed", "startbit": 29, "stopbit": 26 }, { - "name": "inmu", - "type": "inm-unsigned", + "name": "immu", + "type": "imm-unsigned", "startbit": 8, "stopbit": 1 }, @@ -323,16 +221,12 @@ "stopbit": 10 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "reg", "type": "Arithmetic integer", - "signature_definition": "F0 F1 F2 F3 F4", - "signature": "reg,Ctrl-Reg,INT-Reg,SFP-Reg,DFP-Reg", - "signatureRaw": "reg ctrl int sfp dfp", + "signature_definition": "F0 F1, F2, F3, F4", "co": "0000000", "cop": "0000000000", "nwords": 1, @@ -369,16 +263,12 @@ "stopbit": 0 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "off", "type": "Arithmetic integer", - "signature_definition": "F0 F1 F2", - "signature": "off,offset_bytes,offset_words", - "signatureRaw": "off bytes words", + "signature_definition": "F0 F1, F2", "co": "0000000", "cop": "0000000000", "nwords": 1, @@ -403,16 +293,12 @@ "stopbit": 0 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "multi", "type": "Arithmetic integer", "signature_definition": "F0 F1", - "signature": "multi,inm-unsigned", - "signatureRaw": "multi imm4", "co": "1110011", "cop": "0000000000", "nwords": 1, @@ -426,21 +312,17 @@ }, { "name": "imm4", - "type": "inm-unsigned", + "type": "imm-unsigned", "startbit": 31, "stopbit": 28 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "multi", "type": "Arithmetic integer", "signature_definition": "F0 $", - "signature": "multi,$", - "signatureRaw": "multi $", "co": "1011101", "cop": "0000000000", "nwords": 1, @@ -453,16 +335,12 @@ "stopbit": 0 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "multi", "type": "Arithmetic integer", "signature_definition": "F0 F1", - "signature": "multi,inm-unsigned", - "signatureRaw": "multi imm5", "co": "1000001", "cop": "0000000000", "nwords": 1, @@ -476,21 +354,17 @@ }, { "name": "imm5", - "type": "inm-unsigned", + "type": "imm-unsigned", "startbit": 31, "stopbit": 27 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "pad", "type": "Arithmetic integer", - "signature_definition": "F0 F1 F2", - "signature": "multi,inm-signed,inm-signed", - "signatureRaw": "multi imm5 imm6", + "signature_definition": "F0 F1, F2", "co": "1111101", "cop": "0000000000", "nwords": 1, @@ -504,29 +378,25 @@ }, { "name": "imm5", - "type": "inm-signed", + "type": "imm-signed", "startbit": 31, "stopbit": 29, "padding": 2 }, { "name": "imm6", - "type": "inm-signed", + "type": "imm-signed", "startbit": 5, "stopbit": 2, "padding": 2 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "enum", "type": "Arithmetic integer", - "signature_definition": "F0 F1 F2 F3 F4", - "signature": "enum,enum,enum,enum,enum", - "signatureRaw": "enum enum1 enum2 enum3 enum4", + "signature_definition": "F0 F1, F2, F3, F4", "co": "1111111", "cop": "0000000000", "nwords": 1, @@ -567,16 +437,12 @@ "enum_name": "test" } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "lit", "type": "Arithmetic integer", - "signature_definition": "F0 F1a aF1 F1", - "signature": "lit,F1a,aF1,inm-signed", - "signatureRaw": "lit F1a aF1 imm", + "signature_definition": "F0 F1a, aF1, F1", "co": "1111000", "cop": "0000000000", "nwords": 1, @@ -590,21 +456,17 @@ }, { "name": "imm", - "type": "inm-signed", + "type": "imm-signed", "startbit": 3, "stopbit": 0 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "addi", "type": "Arithmetic integer", - "signature_definition": "F0 F3 F2 F1", - "signature": "addi,INT-Reg,INT-Reg,inm-signed", - "signatureRaw": "addi rd rs1 imm", + "signature_definition": "F0 F3, F2, F1", "co": "0010011", "cop": "000", "nwords": 1, @@ -618,7 +480,7 @@ }, { "name": "imm", - "type": "inm-signed", + "type": "imm-signed", "startbit": 31, "stopbit": 20 }, @@ -639,27 +501,22 @@ "type": "cop", "startbit": 14, "stopbit": 12, - "valueField": "000" + "value": "000" } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" } ], "pseudoinstructions": [ { "name": "pseudo", - "signature_definition": "pseudo F0 F1", - "signature": "pseudo,inm-unsigned,INT-Reg", - "signatureRaw": "pseudo imm reg", - "help": "", + "signature_definition": "pseudo F0, F1", "properties": [], "nwords": 1, "fields": [ { "name": "imm", - "type": "inm-unsigned" + "type": "imm-unsigned" }, { "name": "reg", @@ -693,7 +550,7 @@ { "name": ".zero", "action": "space", - "size": "1" + "size": 1 }, { "name": ".align", @@ -718,22 +575,22 @@ { "name": ".byte", "action": "byte", - "size": "1" + "size": 1 }, { "name": ".half", "action": "half_word", - "size": "2" + "size": 2 }, { "name": ".word", "action": "word", - "size": "4" + "size": 4 }, { "name": ".dword", "action": "double_word", - "size": "8" + "size": 8 }, { "name": ".float", @@ -751,32 +608,20 @@ "size": null } ], - "memory_layout": [ - { - "name": "text start", - "value": "0x00000000" - }, - { - "name": "text end", - "value": "0x0000FFFF" - }, - { - "name": "data start", - "value": "0x01000000" - }, - { - "name": "data end", - "value": "0x07FFFFFF" - }, - { - "name": "stack start", - "value": "0x0FFFFFFC" - }, - { - "name": "stack end", - "value": "0x0FFFFFFF" + "memory_layout": { + "text": { + "start": 0, + "end": 65535 + }, + "data": { + "start": 16777216, + "end": 134217727 + }, + "stack": { + "start": 268435452, + "end": 268435455 } - ], + }, "enums": { "enum1": { "a": 1, diff --git a/benches/benchmark.rs b/benches/benchmark.rs index 0809490..4bdb525 100644 --- a/benches/benchmark.rs +++ b/benches/benchmark.rs @@ -6,15 +6,15 @@ use std::collections::HashMap; use std::hint::black_box; use std::time::Duration; -use creator_compiler::parser::AST; -use creator_compiler::prelude::*; +use creator_assembler::parser::AST; +use creator_assembler::prelude::*; static ARCH_JSON: &str = include_str!("arch.json"); static CODE: &str = include_str!("sample.s"); static NAME: &str = "sample.s"; fn parse(arch: &Architecture) -> AST { - parser::parse(black_box(arch.arch_conf.comment_prefix), black_box(CODE)) + parser::parse(black_box(arch.config.comment_prefix), black_box(CODE)) .map_err(|e| eprintln!("{}", e.render(NAME, CODE, true))) .unwrap() } diff --git a/build.sh b/build.sh index 3de7f4a..5efd83f 100755 --- a/build.sh +++ b/build.sh @@ -32,7 +32,7 @@ function BuildFull() { function Help() { - printf "Builds the compiler for usage in WebAssembly + printf "Builds the assembler for usage in WebAssembly Usage: \`./build.sh \` diff --git a/js_example/compiler.mjs b/js_example/assembler.mjs similarity index 81% rename from js_example/compiler.mjs rename to js_example/assembler.mjs index 8355169..a5072ba 100644 --- a/js_example/compiler.mjs +++ b/js_example/assembler.mjs @@ -6,9 +6,9 @@ **/ /** - * @param {import("../pkg/web/creator_compiler.d.ts")} wasm + * @param {import("../pkg/web/creator_assembler.d.ts")} wasm * @param {string} json_arch - * @returns {import("../pkg/web/creator_compiler.d.ts").ArchitectureJS} + * @returns {import("../pkg/web/creator_assembler.d.ts").ArchitectureJS} **/ export function load(wasm, json_arch) { const arch = wasm.ArchitectureJS.from_json(json_arch); @@ -16,8 +16,8 @@ export function load(wasm, json_arch) { } /** - * @param {import("../pkg/web/creator_compiler.d.ts")} wasm - * @param {import("../pkg/web/creator_compiler.d.ts").DataCategoryJS} category + * @param {import("../pkg/web/creator_assembler.d.ts")} wasm + * @param {import("../pkg/web/creator_assembler.d.ts").DataCategoryJS} category * @returns {string} **/ function data_category(wasm, category) { @@ -30,8 +30,8 @@ function data_category(wasm, category) { } /** - * @param {import("../pkg/web/creator_compiler.d.ts")} wasm - * @param {import("../pkg/web/creator_compiler.d.ts").ArchitectureJS} arch + * @param {import("../pkg/web/creator_assembler.d.ts")} wasm + * @param {import("../pkg/web/creator_assembler.d.ts").ArchitectureJS} arch * @param {string} code * @returns {CompilationResult} **/ diff --git a/js_example/index.html b/js_example/index.html index fbf20ac..548daac 100644 --- a/js_example/index.html +++ b/js_example/index.html @@ -2,7 +2,7 @@ - Creator Compiler JS Example + CREATOR Assembler JS Example diff --git a/js_example/node.js b/js_example/node.js index 8aac0f1..e8abf11 100644 --- a/js_example/node.js +++ b/js_example/node.js @@ -1,14 +1,14 @@ const fs = require("fs"); -const wasm = require('../pkg/nodejs/creator_compiler.js'); -/**@type {import("./compiler.mjs")} compiler */ -import("./compiler.mjs").then(compiler => { +const wasm = require('../pkg/nodejs/creator_assembler.js'); +/**@type {import("./assembler.mjs")} assembler */ +import("./assembler.mjs").then(assembler => { const json_arch = fs.readFileSync(__dirname + "/../tests/architecture.json", "utf8") - const arch = compiler.load(wasm, json_arch) + const arch = assembler.load(wasm, json_arch) const src = fs.readFileSync(process.argv[2], "utf8") try { - const compiled = compiler.compile(wasm, arch, src); + const compiled = assembler.compile(wasm, arch, src); console.log(compiled.msg); console.log(compiled.instructions); console.log(compiled.data); diff --git a/js_example/web.js b/js_example/web.js index 73b90f0..939d3a5 100644 --- a/js_example/web.js +++ b/js_example/web.js @@ -1,11 +1,11 @@ -import init, * as wasm from "../pkg/web/creator_compiler.js"; -import * as compiler from "./compiler.mjs"; +import init, * as wasm from "../pkg/web/creator_assembler.js"; +import * as assembler from "./assembler.mjs"; await init({}) const json_arch = await (await fetch("../tests/architecture.json")).text() -const arch = compiler.load(wasm, json_arch) +const arch = assembler.load(wasm, json_arch) console.log(arch.toString()) window["arch"] = arch @@ -14,7 +14,7 @@ const out = document.getElementById("result"); document.getElementById("compile_btn").onclick = function () { try { - const compiled = compiler.compile(wasm, arch, src.value); + const compiled = assembler.compile(wasm, arch, src.value); window["instructions"] = compiled.instructions window["data"] = compiled.data window["instructions"] = compiled.label_table diff --git a/src/architecture.rs b/src/architecture.rs index 844fe6d..2dfeeff 100644 --- a/src/architecture.rs +++ b/src/architecture.rs @@ -31,7 +31,7 @@ use std::collections::HashMap; mod utils; pub use utils::NonEmptyRangeInclusive; -pub use utils::{BaseN, Integer, Pair, RangeFrom}; +pub use utils::{BaseN, Integer, RangeFrom}; mod json; @@ -43,7 +43,7 @@ pub struct Architecture<'a> { /// memory alignment, main function, passing convention, and sensitive register /// name #[serde(borrow)] - pub arch_conf: Config<'a>, + pub config: Config<'a>, /// Components (register files) of the architecture. It's assumed that the first register of /// the first file will contain the program counter pub components: Vec>, @@ -60,6 +60,9 @@ pub struct Architecture<'a> { /// Interrupt configuration #[serde(default)] pub interrupts: Option, + /// Timer configuration + #[serde(default)] + pub timer: Option, /// Definitions of possible enumerated instruction fields #[serde(default)] pub enums: HashMap<&'a str, EnumDefinition<'a>>, @@ -88,8 +91,7 @@ pub struct Modifier { pub type EnumDefinition<'a> = HashMap<&'a str, Integer>; /// Architecture metadata attributes -#[derive(Deserialize, Debug, PartialEq, Eq, Clone, Copy)] -#[serde(try_from = "[json::Config<'a>; 9]")] +#[derive(Deserialize, JsonSchema, Debug, PartialEq, Eq, Clone, Copy)] pub struct Config<'a> { /// Name of the architecture pub name: &'a str, @@ -98,7 +100,7 @@ pub struct Config<'a> { /// Description of the architecture pub description: &'a str, /// Storage format of the architecture (big/little endian) - pub data_format: DataFormat, + pub endianness: Endianness, /// Whether to enable memory alignment pub memory_alignment: bool, /// Name of the `main` function of the program @@ -110,12 +112,11 @@ pub struct Config<'a> { /// String to use as line comment prefix pub comment_prefix: &'a str, } -utils::schema_from!(Config<'a>, [json::Config<'a>; 9]); /// Endianness of data in the architecture #[derive(Deserialize, JsonSchema, Debug, PartialEq, Eq, Clone, Copy)] #[serde(rename_all = "snake_case")] -pub enum DataFormat { +pub enum Endianness { BigEndian, LittleEndian, } @@ -129,8 +130,6 @@ pub struct Component<'a> { r#type: ComponentType, /// Whether the registers have double the word size double_precision: bool, - /// If the registers have double the word size, how this size is achieved - double_precision_type: Option, /// Registers in this file pub elements: Vec>, } @@ -160,39 +159,23 @@ pub enum RegisterType { Float(FloatType), } -/// Type of registers bigger than a single word -#[derive(Deserialize, JsonSchema, Debug, PartialEq, Eq, Clone, Copy)] -#[serde(rename_all = "snake_case")] -pub enum PrecisionType { - /// Register has a bigger size - Extended, - /// Register is made up of 2 word size registers - Linked, -} - /// Register specification #[derive(Deserialize, JsonSchema, Debug, PartialEq, Eq, Clone)] pub struct Register<'a> { /// List of aliases #[serde(borrow)] pub name: Vec<&'a str>, + /// Encoding of the register in an instruction + pub encoding: Integer, /// Size - #[serde(deserialize_with = "utils::from_str")] - #[schemars(with = "utils::StringOrT")] pub nbits: Integer, /// Current value of the register - #[serde(deserialize_with = "utils::from_str")] - #[schemars(with = "utils::StringOrT")] pub value: Number, /// Default value of the register - #[serde(deserialize_with = "utils::optional_from_str")] #[serde(default)] - #[schemars(with = "Option>")] pub default_value: Option, /// Properties of this register pub properties: Vec, - /// Smaller registers that make up this register when the double precision mode is `Linked` - pub simple_reg: Option<[&'a str; 2]>, } /// Properties of a register @@ -243,10 +226,6 @@ pub struct Instruction<'a> { // Can't be a reference because there might be escape sequences, which require // modifying the data on deserialization pub definition: String, - /// Determines whether the field `i` is separated in the resulting binary instruction - pub separated: Option>, - /// Help information of the instruction - pub help: &'a str, /// Properties of the instruction pub properties: Option>, } @@ -285,8 +264,6 @@ pub struct InstructionSyntax<'a, BitRange> { pub parser: crate::parser::Instruction, /// Translated instruction's syntax pub output_syntax: &'a str, - /// User representation of the instruction's syntax - pub user_syntax: String, /// Parameters of the instruction pub fields: Vec>, } @@ -296,8 +273,8 @@ utils::schema_from!(InstructionSyntax<'a, T>, json::InstructionSyntax); #[derive(Deserialize, JsonSchema, Debug, PartialEq, Eq, Clone, Copy)] #[serde(rename_all = "snake_case")] pub enum InstructionProperties { - ExitSubrutine, - EnterSubrutine, + ExitSubroutine, + EnterSubroutine, Privileged, } @@ -335,14 +312,13 @@ pub enum FieldType<'a> { /// Extended operation code Cop { /// Fixed value of this field in the binary instruction (specified as a binary string) - #[serde(rename = "valueField")] value: BaseN<2>, }, /// Immediate signed integer - #[serde(rename = "inm-signed")] + #[serde(rename = "imm-signed")] ImmSigned, /// Immediate unsigned integer - #[serde(rename = "inm-unsigned")] + #[serde(rename = "imm-unsigned")] ImmUnsigned, /// Offset from the next instruction's address in bytes #[serde(rename = "offset_bytes")] @@ -383,8 +359,6 @@ pub struct Pseudoinstruction<'a> { // Can't be a reference because there might be escape sequences, which require // modifying the data on deserialization pub definition: String, - /// Help information of the instruction - pub help: &'a str, /// Properties of the instruction pub properties: Option>, } @@ -499,13 +473,12 @@ pub enum AlignmentType { } /// Memory layout of the architecture -#[derive(Deserialize, Debug, PartialEq, Eq, Clone)] -#[serde(try_from = "Vec>>")] +#[derive(Deserialize, JsonSchema, Debug, PartialEq, Eq, Clone)] pub struct MemoryLayout { /// Addresses reserved for the kernel text segment - kernel_text: Option>, + ktext: Option>, /// Addresses reserved for the kernel data segment - kernel_data: Option>, + kdata: Option>, /// Addresses reserved for the text segment text: NonEmptyRangeInclusive, /// Addresses reserved for the data segment @@ -513,28 +486,64 @@ pub struct MemoryLayout { /// Addresses reserved for the stack segment stack: NonEmptyRangeInclusive, } -utils::schema_from!(MemoryLayout, Vec>>); + +#[derive(Deserialize, JsonSchema, Debug, PartialEq, Eq, Clone)] +pub struct InterruptHandlers { + /// JS Handler for CREATOR interrupt handler's syscall interrupt + pub creator_syscall: Option, + /// JS Handler for the custom interrupt handler + pub custom: Option, +} #[derive(Deserialize, JsonSchema, Debug, PartialEq, Eq, Clone)] pub struct Interrupts { - /// Controls whether interrupts are enabled by default (`true`) or not (`false`) - pub enabled: bool, - /// JS code to be executed in order to check whether an interrupt happened. - /// It must return an `InterruptType` (if an interrupt happened) or `null` (if it didn't) - pub interrupt_check: String, - /// JS code to be executed in order to check whether interrupts are enabled - pub enable_check: String, - /// JS code to be executed in order to enable interrupts - pub interrupt_enable: String, - /// JS code to be executed in order to disable interrupts - pub interrupt_disable: String, - /// JS code to be executed in order to obtain the interrupt handler address - pub get_handler_addr: String, - /// JS code to be executed in order to clear an interrupt - pub clear_interrupt: String, - /// JS arrow (lambda) function to be executed in order to set an interrupt given an interrupt - /// type - pub set_interrupt_cause: String, + /// Interrupt handler configuration + pub handlers: InterruptHandlers, + /// JS code to be executed in order to check what type of interrupt occurred. + /// It must return an `InterruptType` (if an interrupt happened) or `null` + /// (if it didn't) + pub check: String, + /// JS code to be executed in order to enable the specified interrupt + /// `type`. Defaults to `global_enable` + pub enable: Option, + /// JS code to be executed in order to disable the specified interrupt + /// `type`. Defaults to `global_disable` + pub disable: Option, + /// JS code to be executed in order to globally enable interrupts + pub global_enable: String, + /// JS code to be executed in order to globally disable interrupts + pub global_disable: String, + /// JS code to be executed in order to clear an interrupt of the specified + /// `type`. Defaults to `global_clear` + pub clear: Option, + /// JS code to be executed in order to clear all interrupts + pub global_clear: String, + /// JS code to be executed in order to set an interrupt given an interrupt + /// `type` + pub create: String, + /// JS code to check whether the specified interrupt `type` is enabled. Must + /// return a boolean. Defaults to `is_global_enabled` + pub is_enabled: Option, + /// JS code to check whether interrupts are globally is enabled. Must return + /// a boolean + pub is_global_enabled: String, +} + +#[derive(Deserialize, JsonSchema, Debug, PartialEq, Eq, Clone)] +pub struct Timer { + /// Number of clock cycles that correspond to one timer tick + pub tick_cycles: usize, + /// JS code to be executed each tick in order to advance the tick + pub advance: String, + /// JS code to be executed each tick in order to check the timer and act (e.g. launch an + /// interrupt) + pub handler: String, + /// JS code to be executed in order to check whether the timer is enabled + pub is_enabled: String, + /// JS code to be executed in order to enable timer + pub enable: String, + /// JS code to be executed in order to disable timer + pub disable: String, } impl Architecture<'_> { @@ -559,7 +568,7 @@ impl Architecture<'_> { /// doesn't conform to the specification pub fn from_json(src: &str) -> serde_json::Result> { let arch = serde_json::from_str::(src)?; - let word_size = arch.arch_conf.word_size; + let word_size = arch.config.word_size; for instruction in &arch.instructions { let size = instruction.nwords.saturating_mul(word_size); for field in &instruction.syntax.fields { @@ -591,19 +600,19 @@ impl Architecture<'_> { /// Gets the word size of the architecture #[must_use] pub const fn word_size(&self) -> usize { - self.arch_conf.word_size + self.config.word_size } /// Gets the name of the label used as the entry point of the code #[must_use] pub const fn main_label(&self) -> &str { - self.arch_conf.main_function + self.config.main_function } /// Gets the string to use as the line comment prefix #[must_use] pub const fn comment_prefix(&self) -> &str { - self.arch_conf.comment_prefix + self.config.comment_prefix } /// Gets the code section's start/end addresses @@ -615,7 +624,7 @@ impl Architecture<'_> { /// Gets the kernel's code section's start/end addresses #[must_use] pub const fn kernel_code_section(&self) -> Option<&NonEmptyRangeInclusive> { - self.memory_layout.kernel_text.as_ref() + self.memory_layout.ktext.as_ref() } /// Gets the data section's start/end addresses @@ -627,7 +636,7 @@ impl Architecture<'_> { /// Gets the kernel's data section's start/end addresses #[must_use] pub const fn kernel_data_section(&self) -> Option<&NonEmptyRangeInclusive> { - self.memory_layout.kernel_data.as_ref() + self.memory_layout.kdata.as_ref() } /// Gets the instructions with the given name @@ -665,14 +674,11 @@ impl Architecture<'_> { /// * `type`: type of the file wanted pub fn find_reg_files(&self, r#type: RegisterType) -> impl Iterator> { let eq = move |file: &&Component| match r#type { - RegisterType::Int => matches!(file.r#type, ComponentType::Int), - RegisterType::Ctrl => matches!(file.r#type, ComponentType::Ctrl), - RegisterType::Float(FloatType::Float) => matches!( - (file.r#type, file.double_precision_type), - (ComponentType::Float, None | Some(PrecisionType::Extended)) - ), - RegisterType::Float(FloatType::Double) => { - matches!(file.r#type, ComponentType::Float) && file.double_precision_type.is_some() + RegisterType::Int => file.r#type == ComponentType::Int, + RegisterType::Ctrl => file.r#type == ComponentType::Ctrl, + RegisterType::Float(x) => { + file.r#type == ComponentType::Float + && (x == FloatType::Double) == file.double_precision } }; self.components.iter().filter(eq) @@ -688,8 +694,8 @@ impl Component<'_> { /// * `name`: name of the register to search for /// * `case`: whether the find should be case sensitive (`true`) or not (`false`) #[must_use] - pub fn find_register(&self, name: &str, case: bool) -> Option<(usize, &Register<'_>, &str)> { - self.elements.iter().enumerate().find_map(|(i, reg)| { + pub fn find_register(&self, name: &str, case: bool) -> Option<(&Register<'_>, &str)> { + self.elements.iter().find_map(|reg| { let name = reg.name.iter().find(|&&n| { if case { n == name @@ -697,7 +703,7 @@ impl Component<'_> { n.eq_ignore_ascii_case(name) } }); - name.map(|&n| (i, reg, n)) + name.map(|&n| (reg, n)) }) } } diff --git a/src/architecture/json.rs b/src/architecture/json.rs index fb4afc5..6b9dc08 100644 --- a/src/architecture/json.rs +++ b/src/architecture/json.rs @@ -21,13 +21,12 @@ //! Module containing conversion methods between the format used by the architecture JSON //! specification and our internal representation -use num_bigint::BigUint; use schemars::JsonSchema; use serde::Deserialize; -use super::{utils, DataFormat, DirectiveAction}; +use super::{utils, DirectiveAction}; use super::{AlignmentType, FloatType, IntegerType, StringType}; -use utils::{BaseN, Bool, NonEmptyRangeInclusive, Pair, StringOrT}; +use utils::NonEmptyRangeInclusive; /// Directive specification #[derive(Deserialize, JsonSchema, Debug, PartialEq, Eq, Clone, Copy)] @@ -37,9 +36,7 @@ pub struct Directive<'a> { /// Action of the directive pub action: DirectiveAction, /// Size in bytes of values associated with this directive - #[serde(deserialize_with = "utils::optional_from_str")] #[serde(default)] - #[schemars(with = "Option>")] pub size: Option, } @@ -143,17 +140,9 @@ impl TryFrom for super::BitRange { /// Instruction syntax specification #[derive(Deserialize, JsonSchema, Debug, PartialEq, Eq, Clone)] pub struct InstructionSyntax<'a, BitRange> { - /// Order of the fields/literal characters in the instruction text. `[fF]\d+` is interpreted as - /// the field with index i of the instruction. Other characters are interpreted literally - /// Ex: `F0 F3 F1 (F2)` + /// Syntax specification of the instruction. `[fF]\d+` is interpreted as the field with index + /// `i` of the instruction. Other characters are interpreted literally. Ex: `F0 F3 F1 (F2)` pub signature_definition: &'a str, - /// `signature_definition` in which `[fF]\d+` has been replaced with the type of each field in - /// the instruction. Valid values are those in `InstructionFieldType`, except `Co` and `Cop`. - /// The instruction opcode is replaced with its name. Spaces must also be replaced with `,` - pub signature: &'a str, - /// Same as `signature`, but replacing `[fF]\d+` with the field names - #[serde(rename = "signatureRaw")] - pub signature_raw: &'a str, /// Parameters of the instruction pub fields: Vec>, } @@ -162,217 +151,11 @@ impl<'a, T> TryFrom> for super::InstructionSyntax<'a, T type Error = &'static str; fn try_from(value: InstructionSyntax<'a, T>) -> Result { - let format = |fmt: &str| { - let fmt = fmt.replace(" (", "("); - fmt.split_once(' ') - .map(|(opcode, syntax)| format!("{opcode} {}", syntax.replace(' ', ","))) - .unwrap_or(fmt) - }; - let parser = - crate::parser::Instruction::build(&format(value.signature_definition), &value.fields)?; + let parser = crate::parser::Instruction::build(value.signature_definition, &value.fields)?; Ok(Self { parser, output_syntax: value.signature_definition, - user_syntax: format(value.signature_raw), fields: value.fields, }) } } - -/// Architecture metadata attributes -#[derive(Deserialize, JsonSchema, Debug, PartialEq, Eq, Clone, Copy)] -#[serde(tag = "name", content = "value")] -pub enum Config<'a> { - /// Name of the architecture - Name(&'a str), - /// Word size - Bits( - #[serde(deserialize_with = "utils::from_str")] - #[schemars(with = "utils::StringOrT")] - usize, - ), - /// Description of the architecture - Description(&'a str), - /// Storage format of the architecture (big/little endian) - #[serde(rename = "Data Format")] - DataFormat(DataFormat), - /// Whether to enable memory alignment - #[serde(rename = "Memory Alignment")] - MemoryAlignment(Bool), - /// Name of the `main` function of the program - #[serde(rename = "Main Function")] - MainFunction(&'a str), - /// Whether to enable function parameter passing convention checks - #[serde(rename = "Passing Convention")] - PassingConvention(Bool), - /// Whether the register names should be case sensitive (`true`) or not (`false`) - #[serde(rename = "Sensitive Register Name")] - SensitiveRegisterName(Bool), - /// String to use as line comment prefix - CommentPrefix(&'a str), -} - -/// Macro to generate an error message for an incorrect key value -macro_rules! key_error { - ($i:expr, $name:ident) => { - return Err(concat!( - "unexpected key at index ", - stringify!($i), - ", expected key `", - stringify!($name), - "`" - )) - }; -} - -impl<'a> TryFrom<[Config<'a>; 9]> for super::Config<'a> { - type Error = &'static str; - fn try_from(value: [Config<'a>; 9]) -> Result { - /// Macro to unwrap the value of a field, checking that its key is correct - macro_rules! unwrap_field { - ($i:expr, $name:ident) => { - match value[$i] { - Config::$name(x) => x.into(), - _ => key_error!($i, $name), - } - }; - } - Ok(Self { - name: unwrap_field!(0, Name), - word_size: unwrap_field!(1, Bits), - description: unwrap_field!(2, Description), - data_format: unwrap_field!(3, DataFormat), - memory_alignment: unwrap_field!(4, MemoryAlignment), - main_function: unwrap_field!(5, MainFunction), - passing_convention: unwrap_field!(6, PassingConvention), - sensitive_register_name: unwrap_field!(7, SensitiveRegisterName), - comment_prefix: unwrap_field!(8, CommentPrefix), - }) - } -} - -/// Memory layout attribute keys -#[derive(Deserialize, JsonSchema, Debug, PartialEq, Eq, Clone, Copy)] -pub enum MemoryLayoutKeys { - #[serde(rename = "ktext start")] - KtextStart, - #[serde(rename = "ktext end")] - KtextEnd, - #[serde(rename = "kdata start")] - KdataStart, - #[serde(rename = "kdata end")] - KdataEnd, - #[serde(rename = "text start")] - TextStart, - #[serde(rename = "text end")] - TextEnd, - #[serde(rename = "data start")] - DataStart, - #[serde(rename = "data end")] - DataEnd, - #[serde(rename = "stack start")] - StackStart, - #[serde(rename = "stack end")] - StackEnd, -} - -impl TryFrom>>> for super::MemoryLayout { - type Error = &'static str; - fn try_from(mut value: Vec>>) -> Result { - /// Macro to unwrap the value of a field, checking that its key is correct - macro_rules! unwrap_field { - ($i:expr, $name:ident) => { - match value[$i].name { - MemoryLayoutKeys::$name => std::mem::take(&mut value[$i].value.0), - _ => key_error!($i, $name), - } - }; - } - /// Macro to check that two given sections don't overlap - macro_rules! check_overlap { - ($a:ident, $b:ident) => { - if ($a.contains($b.start()) || $b.contains($a.start())) { - return Err(concat!( - "section `", - stringify!($a), - "` overlaps with section `", - stringify!($b), - "`" - )); - } - }; - } - - // unwrap values - - // check for kernel segment - let (kernel_text, kernel_data, offset) = match value.len() { - 10 => { - let ktext = (unwrap_field!(0, KtextStart), unwrap_field!(1, KtextEnd)); - let kdata = (unwrap_field!(2, KdataStart), unwrap_field!(3, KdataEnd)); - - let ktext = NonEmptyRangeInclusive::::build(ktext.0, ktext.1) - .ok_or("section `ktext` is empty")?; - let kdata = NonEmptyRangeInclusive::::build(kdata.0, kdata.1) - .ok_or("section `kdata` is empty")?; - - (Some(ktext), Some(kdata), 4) - } - 6 => (None, None, 0), - - _ => { - return Err("Incorrect number of key-value pairs for memory_layout"); - } - }; - - let text = ( - unwrap_field!(offset, TextStart), - unwrap_field!(offset + 1, TextEnd), - ); - let data = ( - unwrap_field!(offset + 2, DataStart), - unwrap_field!(offset + 3, DataEnd), - ); - let stack = ( - unwrap_field!(offset + 4, StackStart), - unwrap_field!(offset + 5, StackEnd), - ); - - let text = NonEmptyRangeInclusive::::build(text.0, text.1) - .ok_or("section `text` is empty")?; - let data = NonEmptyRangeInclusive::::build(data.0, data.1) - .ok_or("section `data` is empty")?; - let stack = NonEmptyRangeInclusive::::build(stack.0, stack.1) - .ok_or("section `stack` is empty")?; - - // check overlap - - if let Some(ktext) = &kernel_text { - if let Some(kdata) = &kernel_data { - check_overlap!(ktext, kdata); - } - - check_overlap!(ktext, text); - check_overlap!(ktext, data); - check_overlap!(ktext, stack); - } - - if let Some(kdata) = &kernel_data { - check_overlap!(kdata, text); - check_overlap!(kdata, data); - check_overlap!(kdata, stack); - } - - check_overlap!(text, data); - check_overlap!(text, stack); - check_overlap!(data, stack); - - Ok(Self { - kernel_text, - kernel_data, - text, - data, - stack, - }) - } -} diff --git a/src/architecture/utils.rs b/src/architecture/utils.rs index 7d102f5..58c4119 100644 --- a/src/architecture/utils.rs +++ b/src/architecture/utils.rs @@ -25,7 +25,7 @@ use num_traits::{Num as _, One as _}; use schemars::JsonSchema; use serde::{de::Error, Deserialize, Deserializer}; -use core::{fmt::Display, str::FromStr}; +use core::{ops::RangeInclusive, str::FromStr}; /// Thin wrapper for big integers that can be deserialized from JSON, either from a JSON integer or /// a string representing an integer @@ -62,21 +62,6 @@ impl FromStr for Integer { #[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord, JsonSchema)] pub struct BaseN(#[schemars(with = "String")] pub BigUint); -/// A key-value pair -#[derive(Deserialize, JsonSchema, Debug, PartialEq, Eq, Clone, Copy)] -pub struct Pair { - pub name: Keys, - pub value: Value, -} - -/// A value optionally stored as a string -#[derive(Deserialize, JsonSchema)] -#[serde(untagged)] -pub enum StringOrT<'a, T> { - String(&'a str), - T(T), -} - impl<'de> Deserialize<'de> for Integer { fn deserialize>(deserializer: D) -> Result { let s = serde_json::Number::deserialize(deserializer)?; @@ -93,52 +78,6 @@ impl<'de, const N: u8> Deserialize<'de> for BaseN { } } -/// Deserialization function for a value serialized either as a string or as the value itself -pub fn from_str<'de, T, D>(deserializer: D) -> Result -where - D: Deserializer<'de>, - T: FromStr + Deserialize<'de>, - ::Err: Display, -{ - match Deserialize::deserialize(deserializer)? { - StringOrT::T(i) => Ok(i), - StringOrT::String(s) => s.parse::().map_err(Error::custom), - } -} - -/// Deserialization function for an optional value serialized either as a string or as the value -/// itself -pub fn optional_from_str<'de, T, D>(deserializer: D) -> Result, D::Error> -where - D: Deserializer<'de>, - T: FromStr + Deserialize<'de>, - ::Err: Display, -{ - match Deserialize::deserialize(deserializer)? { - None => Ok(None), - Some(StringOrT::T(i)) => Ok(Some(i)), - Some(StringOrT::String(s)) => s.parse::().map(Some).map_err(serde::de::Error::custom), - } -} - -/// A boolean value serialized as a string of a 0/1 -#[derive(Deserialize, JsonSchema, Debug, PartialEq, Eq, Clone, Copy)] -pub enum Bool { - #[serde(rename = "1")] - True, - #[serde(rename = "0")] - False, -} - -impl From for bool { - fn from(value: Bool) -> Self { - match value { - Bool::True => true, - Bool::False => false, - } - } -} - /// Inclusive range guaranteed to be non-empty #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub struct NonEmptyRangeInclusive { @@ -199,6 +138,20 @@ macro_rules! impl_NonEmptyRangeInclusive { impl_NonEmptyRangeInclusive!(BigUint, usize); +impl<'de> Deserialize<'de> for NonEmptyRangeInclusive { + fn deserialize>(deserializer: D) -> Result { + let range: RangeInclusive = Deserialize::deserialize(deserializer)?; + let (start, end) = range.into_inner(); + Self::build(start.0, end.0) + .ok_or("section can't be empty") + .map_err(Error::custom) + } +} + +impl JsonSchema for NonEmptyRangeInclusive { + schema_from!(impl: RangeInclusive); +} + /// Exclusive non-empty range with a possibly unbound end #[derive(Deserialize, Debug, PartialEq, Eq, Clone, Copy)] #[serde(try_from = "(u64, Option)")] @@ -225,23 +178,26 @@ impl TryFrom<(u64, Option)> for RangeFrom { /// Derive implementation of [`JsonSchema`] from the implementation of a different type macro_rules! schema_from { - ($dst:ident$(<$($lt:lifetime)? $($(,)? $t:ident)?>)?, $src:ty) => { - impl $(<$($lt)? $(, $t: JsonSchema)?>)? JsonSchema for $dst$(<$($lt)? $(, $t)?>)? { - fn schema_name() -> String { - <$src as JsonSchema>::schema_name() - } + (impl: $src:ty) => { + fn schema_name() -> String { + <$src as JsonSchema>::schema_name() + } - fn schema_id() -> std::borrow::Cow<'static, str> { - <$src as JsonSchema>::schema_id() - } + fn schema_id() -> std::borrow::Cow<'static, str> { + <$src as JsonSchema>::schema_id() + } - fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema { - <$src as JsonSchema>::json_schema(gen) - } + fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema { + <$src as JsonSchema>::json_schema(gen) + } - fn is_referenceable() -> bool { - <$src as JsonSchema>::is_referenceable() - } + fn is_referenceable() -> bool { + <$src as JsonSchema>::is_referenceable() + } + }; + ($dst:ident$(<$($lt:lifetime)? $($(,)? $t:ident)?>)?, $src:ty) => { + impl $(<$($lt)? $(, $t: JsonSchema)?>)? JsonSchema for $dst$(<$($lt)? $(, $t)?>)? { + $crate::architecture::utils::schema_from!(impl: $src); } }; } diff --git a/src/compiler.rs b/src/compiler.rs index 700ae4a..6f19655 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -272,14 +272,14 @@ fn parse_instruction<'a>( // Otherwise, store it in case this is the only matching definition possible_def = Some((inst, parsed_args)); } - Err(e) => errs.push((inst.syntax.user_syntax.to_string(), e)), + Err(e) => errs.push((inst.syntax.parser.syntax().to_string(), e)), } } for inst in arch.find_pseudoinstructions(name.0) { match inst.syntax.parser.parse(args) { // If parsing is successful, assume this definition is the correct one and return it Ok(parsed_args) => return Ok((InstructionDefinition::Pseudo(inst), parsed_args)), - Err(e) => errs.push((inst.syntax.user_syntax.to_string(), e)), + Err(e) => errs.push((inst.syntax.parser.syntax().to_string(), e)), } } // None of the definitions matched perfectly. If there is a matching definition that failed due @@ -1089,9 +1089,9 @@ fn evaluate_instruction_field( files .peek() .ok_or_else(|| ErrorKind::UnknownRegisterFile(file_type).add_span(arg.value.1))?; - let case = ctx.arch.arch_conf.sensitive_register_name; + let case = ctx.arch.config.sensitive_register_name; // Find the register with the given name - let (i, _, name) = files + let (reg, name) = files .find_map(|file| file.find_register(&name, case)) .ok_or_else(|| { ErrorKind::UnknownRegister { @@ -1100,7 +1100,7 @@ fn evaluate_instruction_field( } .add_span(arg.value.1) })?; - (i.into(), name.to_string()) + (reg.encoding.0.clone().into(), name.to_string()) } // Enumerated fields FieldType::Enum { enum_name } => { @@ -1470,20 +1470,21 @@ mod test { #[test] fn instruction_fields_regs() { // Simple - let x = compile(".text\nmain: reg ctrl1, x2, ft1, ft2").unwrap(); + let x = compile(".text\nmain: reg ctrl1, x2, fs1, ft2").unwrap(); let binary = "01001000000000000000000000010010"; + let result = "reg ctrl1, x2, fs1, ft2"; let tbl = label_table([("main", 0, 6..11)]); assert_eq!(x.label_table, tbl); assert_eq!( x.instructions, - vec![inst(0, &["main"], "reg ctrl1 x2 ft1 ft2", binary, 12..35)] + vec![inst(0, &["main"], result, binary, 12..35)] ); assert_eq!(x.data_memory, vec![]); assert_eq!(x.global_symbols, HashSet::new()); // Aliases - let x = compile(".text\nmain: reg ctrl1, two, F1, Field2").unwrap(); + let x = compile(".text\nmain: reg ctrl1, two, f1, Field2").unwrap(); assert_eq!(x.label_table, tbl); - let instruction = "reg ctrl1 two F1 Field2"; + let instruction = "reg ctrl1, two, f1, Field2"; assert_eq!( x.instructions, vec![inst(0, &["main"], instruction, binary, 12..38)] @@ -1491,20 +1492,11 @@ mod test { assert_eq!(x.data_memory, vec![]); assert_eq!(x.global_symbols, HashSet::new()); // Number aliases - let x = compile(".text\nmain: reg ctrl1, 2, ft1, ft2").unwrap(); + let x = compile(".text\nmain: reg ctrl1, 2, fs1, ft2").unwrap(); assert_eq!(x.label_table, tbl); assert_eq!( x.instructions, - vec![inst(0, &["main"], "reg ctrl1 2 ft1 ft2", binary, 12..34)] - ); - assert_eq!(x.data_memory, vec![]); - assert_eq!(x.global_symbols, HashSet::new()); - // Linked floating point registers - let x = compile(".text\nmain: reg ctrl1, x2, fs1, FD2").unwrap(); - assert_eq!(x.label_table, tbl); - assert_eq!( - x.instructions, - vec![inst(0, &["main"], "reg ctrl1 x2 fs1 FD2", binary, 12..35)] + vec![inst(0, &["main"], "reg ctrl1, 2, fs1, ft2", binary, 12..34)] ); assert_eq!(x.data_memory, vec![]); assert_eq!(x.global_symbols, HashSet::new()); @@ -1531,7 +1523,7 @@ mod test { assert_eq!(x.label_table, label_table([("main", 0, 6..11)])); assert_eq!( x.instructions, - vec![inst(0, &["main"], "imm -7 255 11", binary, 12..27)] + vec![inst(0, &["main"], "imm -7, 255, 11", binary, 12..27)] ); assert_eq!(x.data_memory, vec![]); assert_eq!(x.global_symbols, HashSet::new()); @@ -1554,7 +1546,7 @@ mod test { x.instructions, vec![ main_nop(12..15), - inst(4, &["a"], "imm 4 16 8", binary, 19..30), + inst(4, &["a"], "imm 4, 16, 8", binary, 19..30), inst(8, &["b"], "nop", NOP_BINARY, 34..37), ] ); @@ -1572,7 +1564,7 @@ mod test { assert_eq!(x.label_table, label_table([("main", 0, 6..11)])); assert_eq!( x.instructions, - vec![inst(0, &["main"], "off 7 -8", binary, 12..21)] + vec![inst(0, &["main"], "off 7, -8", binary, 12..21)] ); assert_eq!(x.data_memory, vec![]); assert_eq!(x.global_symbols, HashSet::new()); @@ -1585,7 +1577,7 @@ mod test { assert_eq!(x.label_table, label_table([("main", 0, 6..11)])); assert_eq!( x.instructions, - vec![main_nop(12..15), inst(4, &[], "off -4 -1", binary, 16..30),] + vec![main_nop(12..15), inst(4, &[], "off -4, -1", binary, 16..30),] ); assert_eq!(x.data_memory, vec![]); assert_eq!(x.global_symbols, HashSet::new()); @@ -1600,7 +1592,7 @@ mod test { x.instructions, vec![ inst(0, &["a"], "nop", NOP_BINARY, 9..12), - inst(4, &[], "off 4 1", binary, 13..27), + inst(4, &[], "off 4, 1", binary, 13..27), inst(8, &["main"], "nop", NOP_BINARY, 34..37), ] ); @@ -1615,7 +1607,7 @@ mod test { assert_eq!(x.label_table, label_table([("main", 0, 6..11)])); assert_eq!( x.instructions, - vec![inst(0, &["main"], "off 6 7", binary, 12..20)] + vec![inst(0, &["main"], "off 6, 7", binary, 12..20)] ); assert_eq!(x.data_memory, vec![]); assert_eq!(x.global_symbols, HashSet::new()); @@ -1631,7 +1623,7 @@ mod test { ); assert_eq!( x.instructions, - vec![inst(0, &["main"], "off 1 4", binary, 12..20)] + vec![inst(0, &["main"], "off 1, 4", binary, 12..20)] ); assert_eq!( x.data_memory, @@ -1650,7 +1642,7 @@ mod test { assert_eq!(x.label_table, label_table([("main", 0, 6..11)])); assert_eq!( x.instructions, - vec![inst(0, &["main"], "enum a b value last", binary, 12..34)] + vec![inst(0, &["main"], "enum a, b, value, last", binary, 12..34)] ); assert_eq!(x.data_memory, vec![]); assert_eq!(x.global_symbols, HashSet::new()); @@ -1663,7 +1655,7 @@ mod test { assert_eq!(x.label_table, label_table([("main", 0, 6..11)])); assert_eq!( x.instructions, - vec![inst(0, &["main"], "pad 12 4", binary, 12..21)] + vec![inst(0, &["main"], "pad 12, 4", binary, 12..21)] ); assert_eq!(x.data_memory, vec![]); assert_eq!(x.global_symbols, HashSet::new()); @@ -1673,7 +1665,7 @@ mod test { assert_eq!(x.label_table, label_table([("main", 0, 6..11)])); assert_eq!( x.instructions, - vec![inst(0, &["main"], "pad -16 -4", binary, 12..23)] + vec![inst(0, &["main"], "pad -16, -4", binary, 12..23)] ); assert_eq!(x.data_memory, vec![]); assert_eq!(x.global_symbols, HashSet::new()); @@ -1686,7 +1678,7 @@ mod test { assert_eq!(x.label_table, label_table([("main", 0, 6..11)])); assert_eq!( x.instructions, - vec![inst(0, &["main"], "lit F1a aF1 3", binary, 12..27)] + vec![inst(0, &["main"], "lit F1a, aF1, 3", binary, 12..27)] ); assert_eq!(x.data_memory, vec![]); assert_eq!(x.global_symbols, HashSet::new()); @@ -2100,9 +2092,10 @@ mod test { let x = compile(".text\nmain: nop\nimm ., 0, 0\n.data\n.word .").unwrap(); assert_eq!(x.label_table, label_table([("main", 0, 6..11)])); let binary = "00010000000000000000000000000000"; + let result = "imm 4, 0, 0"; assert_eq!( x.instructions, - vec![main_nop(12..15), inst(4, &[], "imm 4 0 0", binary, 16..27)] + vec![main_nop(12..15), inst(4, &[], result, binary, 16..27)] ); assert_eq!( x.data_memory, @@ -2254,15 +2247,15 @@ mod test { .add_span((24..26).span())), ); assert_eq!( - compile(".text\nmain: reg PC, x0, FD1, ft2"), + compile(".text\nmain: reg PC, x0, F1, ft2"), Err(ErrorKind::UnknownRegister { - name: "FD1".into(), + name: "F1".into(), file: RegisterType::Float(FloatType::Float), } - .add_span((24..27).span())), + .add_span((24..26).span())), ); assert_eq!( - compile(".text\nmain: reg PC, x0, ft1, fs2"), + compile(".text\nmain: reg PC, x0, fs1, fs2"), Err(ErrorKind::UnknownRegister { name: "fs2".into(), file: RegisterType::Float(FloatType::Double), diff --git a/src/compiler/pseudoinstruction.rs b/src/compiler/pseudoinstruction.rs index 3eaa9d5..61022cf 100644 --- a/src/compiler/pseudoinstruction.rs +++ b/src/compiler/pseudoinstruction.rs @@ -33,13 +33,13 @@ use regex::{Captures, Regex}; use std::fmt::Write as _; use std::sync::LazyLock; -use crate::architecture::{FloatType, Pseudoinstruction, RegisterType}; +use crate::architecture::Pseudoinstruction; use crate::number::Number; use crate::parser::{ParseError, Token}; use crate::span::Range; use super::{ArgumentType, Context, ErrorData, ErrorKind, InstructionDefinition}; -use super::{Expr, ParsedArgs}; +use super::{Expr, ParsedArgs, ParsedArgument}; use super::{Span, Spanned, SpannedErr}; /// Pseudoinstruction evaluation error kind @@ -227,9 +227,6 @@ pub fn expand<'arch>( args: &ParsedArgs, ) -> Result, ErrorData> { // Regex used - // Register name should be replaced with the register name of the i-th register forming this - // double precision register - static ALIAS_DOUBLE: LazyLock = crate::regex!(r"aliasDouble\(([^;]+);(\d+)\)"); // Gets the value of the i-th argument from bits j to k, evaluating the argument as the given // type static FIELD_VALUE: LazyLock = crate::regex!(r"Field\.(\d+)\.\((\d+),(\d+)\)\.(\w+)"); @@ -263,55 +260,25 @@ pub fn expand<'arch>( // Expansion let mut def = instruction.definition.replace('\n', ""); - let case = arch.arch_conf.sensitive_register_name; let mods = &arch.modifiers; - // Replace occurrences of `AliasDouble()` - while let Some(x) = ALIAS_DOUBLE.captures(&def) { - let (_, [name, i]) = x.extract(); - // Get the user's register name - let name = get_arg(name).ok_or_else(|| { + // Replace occurrences of `Field.number` + while let Some(x) = FIELD_VALUE.captures(&def) { + let (_, [arg, start_bit, end_bit, ty]) = x.extract(); + let arg_num = num(arg) - 1; + // Get the user's argument expression + let arg: &ParsedArgument = args.get(arg_num).ok_or_else(|| { Error { definition: def.clone(), span: capture_span(&x, 1), - kind: Kind::UnknownFieldName(name.to_owned()), + kind: Kind::UnknownFieldNumber { + idx: arg_num + 1, + size: args.len(), + }, } .compile_error(instruction, span) })?; - let name = ®_name(&name.value)?; - let i: usize = num(i); - // Find the register name and replace it - for file in arch.find_reg_files(RegisterType::Float(FloatType::Double)) { - if let Some((_, reg, _)) = file.find_register(name, case) { - let name = reg - .simple_reg - .and_then(|regs| regs.get(i).copied()) - .unwrap_or(name); - def.replace_range(capture_span(&x, 0), name); - break; - } - } - } - - // Replace occurrences of `Field.number` - while let Some(x) = FIELD_VALUE.captures(&def) { - let (_, [arg, start_bit, end_bit, ty]) = x.extract(); - let arg_num = num(arg) - 1; - // Get the user's argument expression - let (value, value_span) = &args - .get(arg_num) - .ok_or_else(|| { - Error { - definition: def.clone(), - span: capture_span(&x, 1), - kind: Kind::UnknownFieldNumber { - idx: arg_num + 1, - size: args.len(), - }, - } - .compile_error(instruction, span) - })? - .value; + let (value, value_span) = &arg.value; // Get the range of bits requested let start_bit = num(start_bit); let end_bit = num(end_bit); diff --git a/src/compiler/section.rs b/src/compiler/section.rs index eec5d63..ad0235a 100644 --- a/src/compiler/section.rs +++ b/src/compiler/section.rs @@ -46,8 +46,8 @@ impl Section { /// * `bounds`: start/end addresses of the section #[must_use] pub fn new(name: &'static str, bounds: Option<&NonEmptyRangeInclusive>) -> Self { - bounds.map_or( - Self { + bounds.map_or_else( + || Self { name, address: 1u8.into(), end: BigUint::ZERO, diff --git a/src/js.rs b/src/js.rs index bdc837c..9bb8e13 100644 --- a/src/js.rs +++ b/src/js.rs @@ -18,7 +18,7 @@ * along with CREATOR. If not, see . */ -//! Module containing the definition of wrappers for the compiler and generattion of `JS` bindings +//! Module containing the definition of wrappers for the compiler and generation of `JS` bindings //! for interoperability use std::collections::HashMap; diff --git a/src/lib.rs b/src/lib.rs index 3ea0dd1..d81ee2f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,10 +21,10 @@ #![doc = include_str!("../README.md")] //! # Example //! -//! Example usage of the compiler from Rust: +//! Example usage of the assembler from Rust: //! //! ``` -//! use creator_compiler::prelude::*; +//! use creator_assembler::prelude::*; //! use std::collections::HashMap; //! //! let arch_json = include_str!("../tests/architecture.json"); @@ -43,7 +43,7 @@ //! "; //! //! // Parse the code -//! let ast = parser::parse(arch.arch_conf.comment_prefix, code) +//! let ast = parser::parse(arch.config.comment_prefix, code) //! .map_err(|e| eprintln!("{}", e.clone().render("file.s", code, true))) //! .expect("The code should be valid"); //! // Compile the code diff --git a/src/main.rs b/src/main.rs index 55e418f..86839e7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -26,7 +26,7 @@ use clap::{Parser, Subcommand}; use num_bigint::BigUint; -use creator_compiler::prelude::*; +use creator_assembler::prelude::*; /// Command-line arguments parser #[derive(Parser)] diff --git a/src/parser.rs b/src/parser.rs index 7f9daa6..e51ac16 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -144,12 +144,19 @@ where // Instruction: `instruction -> ident [^\n]*` let instruction = ident .then( - any() - .and_is(newline().not()) + none_of([Token::Ctrl('\n')]) .map_with(|token, e| (token, e.span())) .repeated() .collect() - .map_with(|args, e| (args, e.span())), + .map_with(|args: Vec<_>, e| { + // Fix the span being wrong when there are no arguments (when sub-parser doesn't + // consume input). SEE: + let mut s: Span = e.span(); + if args.is_empty() { + s.start = s.end; + } + (args, s) + }), ) .map(|(name, args)| Statement::Instruction(InstructionNode { name, args })) .labelled("instruction"); @@ -189,14 +196,13 @@ macro_rules! parse_with { let src = $src.with_context(FileID::SRC); || -> Result<_, ParseError> { let tokens = lexer::lexer($comment_prefix).parse(src).into_result()?; - // TODO: replace with `chumsky::input::IterInput` on chumsky 0.10.2 (on 0.10.1 it - // doesn't implement the correct traits) let tokens = tokens.map(end, |(x, s)| (x, s)); let res = $parser.parse(tokens).into_result()?; Ok(res) }() }}; } +#[allow(unused_imports)] // This is used below, but clippy doesn't seem to detect it use parse_with; /// Parses the input creating an abstract syntax tree @@ -411,7 +417,17 @@ mod test { ("name\n", empty.clone()), ("name\r", empty.clone()), ("name\r\n", empty.clone()), + ("name \n", empty.clone()), ("name", empty), + ( + "name a ", + vec![instruction( + vec![], + ("name", 0..4), + (vec![(Token::Identifier("a".into()), 5..6)], 5..6), + 0..6, + )], + ), ( "name a\n", vec![instruction( diff --git a/src/parser/error.rs b/src/parser/error.rs index ea75d4b..c9b8b90 100644 --- a/src/parser/error.rs +++ b/src/parser/error.rs @@ -69,7 +69,7 @@ impl crate::RenderError for Vec> { // The implementation of ToString for Rich pattern adds quotes // automatically, but we will add them later RichPattern::Token(t) => t.to_string(), - RichPattern::Identifier(i) => i.to_string(), + RichPattern::Identifier(i) => i.clone(), _ => e.to_string(), }; format!( diff --git a/src/parser/expression.rs b/src/parser/expression.rs index decf6f6..6065f18 100644 --- a/src/parser/expression.rs +++ b/src/parser/expression.rs @@ -107,16 +107,16 @@ pub enum Expr { /// Operation to perform op: Spanned, /// Operand to perform the operation on - operand: Box>, + operand: Box>, }, /// Binary operation on other expressions BinaryOp { /// Operation to perform op: Spanned, /// Left operand of the operation - lhs: Box>, + lhs: Box>, /// Right operand of the operation - rhs: Box>, + rhs: Box>, }, } @@ -217,8 +217,6 @@ pub fn parser<'tokens, I>() -> Parser!('tokens, I, Spanned) where I: ValueInput<'tokens, Token = Token, Span = Span>, { - // Newline tokens - let newline = || just(Token::Ctrl('\n')).repeated(); // Literal values let literal = select! { Token::Integer(x) => Expr::Integer(x), @@ -232,11 +230,9 @@ where // Operator parser macro_rules! op { (:$name:literal: $($i:ident => $o:expr),+ $(,)?) => { - newline().ignore_then( - select! { $(Token::Operator(Operator::$i) => $o,)+ } - .map_with(|x, e| (x, e.span())) - .labelled(concat!($name, " operator")) - ) + select! { $(Token::Operator(Operator::$i) => $o,)+ } + .map_with(|x, e| (x, e.span())) + .labelled(concat!($name, " operator")) }; ($($i:ident => $o:expr),+ $(,)?) => { op!(:"binary": $($i => $o,)+) }; } @@ -260,15 +256,8 @@ where } recursive(|expr| { - // NOTE: newlines before atoms (literal numbers/parenthesized expressions) and operators - // are allowed so that expressions may span multiple lines. Newlines aren't allowed after - // them to prevent them from consuming new lines required to end statements - // paren_expr: `paren_expr -> ( expression )` - let paren_expr = expr.delimited_by( - just(Token::Ctrl('(')), - newline().ignore_then(just(Token::Ctrl(')'))), - ); + let paren_expr = expr.delimited_by(just(Token::Ctrl('(')), just(Token::Ctrl(')'))); // modifier: `modifier -> % ident paren_expr` let modifier = just(Token::Operator(Operator::Percent)) .ignore_then(select! {Token::Identifier(name) => name }.labelled("identifier")) @@ -281,9 +270,8 @@ where // Remove span to replace it with one including the parenthesis let paren_expr = paren_expr.map(|(x, _)| x); - // atom: `atom -> \n* (literal | modifier | paren_expr)` + // atom: `atom -> literal | modifier | paren_expr` let atom = choice((literal, modifier, paren_expr)).map_with(|atom, e| (atom, e.span())); - let atom = newline().ignore_then(atom); let atom = atom.labelled("expression").as_context(); let high_precedence = op!( @@ -394,7 +382,7 @@ mod test { test([ ("16", span(Expr::Integer(16u8.into()), 0..2), Ok(16.into())), ( - "\n\n16", + "\t 16", span(Expr::Integer(16u8.into()), 2..4), Ok(16.into()), ), @@ -490,12 +478,12 @@ mod test { Ok((2.2, 1..4).into()), ), ( - "\n\n+\n2", + "\t + 2", un_op((UnaryOp::Plus, 2..3), int(2, 4..5)), Ok(2.into()), ), ( - "\n\n+\n2.2", + " \t+\t2.2", un_op((UnaryOp::Plus, 2..3), float(2.2, 4..7)), Ok((2.2, 4..7).into()), ), @@ -541,7 +529,7 @@ mod test { Ok(12.into()), ), ( - "\n5 \n\n+ \n7", + "\t5 \t\t+ \t7", bin_op((BinaryOp::Add, 5..6), int(5, 1..2), int(7, 8..9)), Ok(12.into()), ), @@ -600,7 +588,7 @@ mod test { Ok(35.into()), ), ( - "\n5 \n\n* \n7", + "\t5 \t\t* \t7", bin_op((BinaryOp::Mul, 5..6), int(5, 1..2), int(7, 8..9)), Ok(35.into()), ), @@ -689,7 +677,7 @@ mod test { Ok(0b0110.into()), ), ( - "\n0b0101 \n\n^ \n0b0011", + "\t0b0101 \t\t^ \t0b0011", bin_op( (BinaryOp::BitwiseXOR, 10..11), int(0b0101, 1..7), @@ -698,7 +686,7 @@ mod test { Ok(0b0110.into()), ), ( - "\n0b0101 \n\n^ \n1.1", + "\t0b0101 \t\t^ \t1.1", bin_op( (BinaryOp::BitwiseXOR, 10..11), int(0b0101, 1..7), @@ -878,7 +866,7 @@ mod test { Ok(0.into()), ), ( - "1 + \n(\n2 - 3\n)", + "1 + \t(\t2 - 3\t)", bin_op( (BinaryOp::Add, 2..3), int(1, 0..1), @@ -916,7 +904,7 @@ mod test { Ok(0.into()), ), ( - "\n- \n\n+ \n1", + "\t- \t\t+ \t1", un_op( (UnaryOp::Minus, 1..2), un_op((UnaryOp::Plus, 5..6), int(1, 8..9)), diff --git a/src/parser/instruction.rs b/src/parser/instruction.rs index deb3f9a..db860c7 100644 --- a/src/parser/instruction.rs +++ b/src/parser/instruction.rs @@ -25,6 +25,7 @@ use chumsky::{input::MappedInput, prelude::*}; use regex::Regex; +use std::fmt::Write; use std::sync::LazyLock; use super::{expression, expression::Expr, lexer, ParseError, Span, Spanned, Token}; @@ -46,8 +47,6 @@ pub type ParsedArgs = Vec; /// Input type to be used with instruction argument parsers // NOTE: we need to name this input type to be able to box the parsers, which is required to store // them on a struct -// TODO: replace with `chumsky::input::IterInput` on chumsky 0.10.2 (on 0.10.1 it doesn't implement -// the correct traits) type TokenInput<'src> = MappedInput], fn(&Spanned) -> (&Token, &Span)>; @@ -56,7 +55,12 @@ type BoxedParser<'src> = super::Parser!(boxed: 'src, TokenInput<'src>, ParsedArg /// Instruction parser wrapper #[derive(Clone)] -pub struct Instruction(BoxedParser<'static>); +pub struct Instruction { + /// Parser for the syntax + parser: BoxedParser<'static>, + /// Human-readable syntax + syntax: String, +} /// Instruction statement AST node with references to data pub type InstructionNodeRef<'src> = (Spanned<&'src str>, Spanned<&'src [Spanned]>); @@ -87,9 +91,11 @@ impl Instruction { /// # Errors /// /// Errors if the syntax specification is invalid + #[allow(clippy::missing_panics_doc)] // Function should never panic pub fn build(fmt: &str, fields: &[InstructionField]) -> Result { // Regex for a instruction argument placeholder static FIELD: LazyLock = crate::regex!(r"^[fF][0-9]+$"); + static WRITE_EXPECT: &str = "Writing to an in-memory vector can't fail"; // Gets the field number the placeholder points to and validates that it has a correct type let field = |ident: String, no_co: bool| -> Result { @@ -117,6 +123,7 @@ impl Instruction { // Creates an initial dummy parser that consumes no input let parser = any().ignored().or(end()).rewind(); + let mut syntax = String::with_capacity(fmt.len()); // Validate the first token is a field placeholder pointing to the opcode/instruction name let mut parser = parser .to(match tokens.next() { @@ -125,6 +132,7 @@ impl Instruction { let i = field(ident, false)?; match fields[i].r#type { FieldType::Co => { + write!(syntax, "{} ", fields[i].name).expect(WRITE_EXPECT); // NOTE: This value should never be read, we only need it to point to the // opcode instruction field vec![ParsedArgument { @@ -144,12 +152,18 @@ impl Instruction { .boxed(); // Iterate through the remaining tokens + let mut prev_symbol = true; for token in tokens { // Append the current token parser to the parser being created parser = match token { // The current token is an argument placeholder => parse an expression/identifier Token::Identifier(ident) if FIELD.is_match(&ident) => { let field_idx = field(ident, true)?; // Validate the field pointed to + if !prev_symbol { + syntax.push(' '); + } + write!(syntax, "{}", fields[field_idx].name).expect(WRITE_EXPECT); + prev_symbol = false; parser .then(expression::parser()) .map(move |(mut args, value)| { @@ -160,11 +174,36 @@ impl Instruction { } // The current token isn't an argument placeholder => parse it literally, ignoring // its output - _ => parser.then_ignore(just(token)).boxed(), + _ => { + let symbol = matches!( + token, + Token::Operator(_) | Token::Ctrl(_) | Token::Literal(_) + ); + + if !prev_symbol && !symbol { + syntax.push(' '); + } + match &token { + Token::Integer(n) => write!(syntax, "{n}"), + Token::Float(x) => write!(syntax, "{}", f64::from(*x)), + Token::String(s) => write!(syntax, "\"{s}\""), + Token::Character(c) => write!(syntax, "\'{c}\'"), + Token::Identifier(i) => write!(syntax, "{i}"), + Token::Label(l) => write!(syntax, "{l}:"), + Token::Directive(d) => write!(syntax, "{d}"), + Token::Operator(c) => write!(syntax, "{c}"), + Token::Ctrl(',') => write!(syntax, ", "), + Token::Ctrl(c) | Token::Literal(c) => write!(syntax, "{c}"), + } + .expect(WRITE_EXPECT); + prev_symbol = symbol; + parser.then_ignore(just(token)).boxed() + } } } + syntax.truncate(syntax.trim_end().len()); // Check that there is no remaining input in the syntax and create the final parser - Ok(Self(parser)) + Ok(Self { parser, syntax }) } /// Parses the arguments of an instruction according to the syntax @@ -193,7 +232,7 @@ impl Instruction { // SAFETY: This is safe because the stored parser has a lifetime of `'static`, so we will // only ever reduce its lifetime. Since lifetimes are removed during monomorphisation, the // parser must be valid for arbitrary lifetimes. - unsafe { &*(&raw const self.0).cast() } + unsafe { &*(&raw const self.parser).cast() } } /// Lexes an instruction represented as a string @@ -236,12 +275,20 @@ impl Instruction { let input = tokens.map(end, |(x, s)| (x, s)); Ok(parser.parse(input).into_result()?) } + + /// Returns a human-readable representation of the syntax + #[must_use] + pub fn syntax(&self) -> &str { + &self.syntax + } } // Boxed parsers don't implement `Debug`, so we need to implement it manually as an opaque box impl std::fmt::Debug for Instruction { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_tuple("InstructionParser").finish() + f.debug_tuple("InstructionParser") + .field(&self.syntax) + .finish() } } @@ -254,8 +301,8 @@ mod test { #[must_use] fn fields() -> [InstructionField<'static, ()>; 3] { - let field = |co| InstructionField { - name: "", + let field = |co, name| InstructionField { + name, r#type: if co { FieldType::Co } else { @@ -263,7 +310,7 @@ mod test { }, range: (), }; - [field(true), field(false), field(false)] + [field(true, "name"), field(false, "a"), field(false, "b")] } fn parse(parser: &Instruction, src: &str) -> Result { @@ -300,6 +347,7 @@ mod test { #[test] fn no_args() { let parser = Instruction::build("F0", &fields()).unwrap(); + assert_eq!(parser.syntax(), "name"); assert_eq!(parse(&parser, ""), Ok(vec![co_arg()])); assert_eq!(parse(&parser, "a"), Err(())); } @@ -307,6 +355,7 @@ mod test { #[test] fn one_arg() { let parser = Instruction::build("F0 F1", &fields()).unwrap(); + assert_eq!(parser.syntax(), "name a"); assert_eq!(parse(&parser, ""), Err(())); assert_eq!(parse(&parser, ","), Err(())); assert_eq!(parse(&parser, "$"), Err(())); @@ -357,6 +406,7 @@ mod test { #[test] fn multiple_arg() { let parser = Instruction::build("F0 F2 F1", &fields()).unwrap(); + assert_eq!(parser.syntax(), "name b a"); assert_eq!(parse(&parser, ""), Err(())); assert_eq!(parse(&parser, ","), Err(())); assert_eq!(parse(&parser, "a"), Err(())); @@ -382,6 +432,7 @@ mod test { #[test] fn comma_separator() { let parser = Instruction::build("F0 F1, F2", &fields()).unwrap(); + assert_eq!(parser.syntax(), "name a, b"); assert_eq!(parse(&parser, "1 2"), Err(())); assert_eq!( parse(&parser, "1, 2"), @@ -396,6 +447,7 @@ mod test { #[test] fn literals() { let parser = Instruction::build("F0 ,1 F1 $(F2)", &fields()).unwrap(); + assert_eq!(parser.syntax(), "name , 1 a$(b)"); assert_eq!(parse(&parser, "2 5"), Err(())); assert_eq!(parse(&parser, ",1 2 5"), Err(())); assert_eq!(parse(&parser, ",1 2 (5)"), Err(())); @@ -410,6 +462,7 @@ mod test { ]) ); let parser = Instruction::build("F0 1 * -F1", &fields()).unwrap(); + assert_eq!(parser.syntax(), "name 1*-a"); assert_eq!(parse(&parser, "2"), Err(())); assert_eq!(parse(&parser, "-2"), Err(())); assert_eq!(parse(&parser, "* -2"), Err(())); @@ -420,6 +473,7 @@ mod test { Ok(vec![co_arg(), arg((number(2), 5..6), 1)]) ); let parser = Instruction::build("F0 aF1 F1a F2", &fields()).unwrap(); + assert_eq!(parser.syntax(), "name aF1 F1a b"); assert_eq!(parse(&parser, "1 1 2"), Err(())); assert_eq!(parse(&parser, "a1 1a 2"), Err(())); assert_eq!(parse(&parser, "aF1 f1a 2"), Err(())); diff --git a/tests/architecture.json b/tests/architecture.json index 9875d1a..1adc77d 100644 --- a/tests/architecture.json +++ b/tests/architecture.json @@ -1,42 +1,15 @@ { - "arch_conf": [ - { - "name": "Name", - "value": "Test" - }, - { - "name": "Bits", - "value": "32" - }, - { - "name": "Description", - "value": "Test architecture" - }, - { - "name": "Data Format", - "value": "big_endian" - }, - { - "name": "Memory Alignment", - "value": "1" - }, - { - "name": "Main Function", - "value": "main" - }, - { - "name": "Passing Convention", - "value": "1" - }, - { - "name": "Sensitive Register Name", - "value": "1" - }, - { - "name": "CommentPrefix", - "value": "#" - } - ], + "config": { + "name": "Test", + "word_size": 32, + "description": "Test architecture", + "endianness": "big_endian", + "memory_alignment": true, + "main_function": "main", + "passing_convention": true, + "sensitive_register_name": true, + "comment_prefix": "#" + }, "components": [ { "name": "Control registers", @@ -44,28 +17,25 @@ "double_precision": false, "elements": [ { - "name": [ - "PC" - ], - "nbits": "32", + "name": ["PC"], + "nbits": 32, + "encoding": 0, "value": 0, "default_value": 0, "properties": [] }, { - "name": [ - "ctrl1" - ], - "nbits": "32", + "name": ["ctrl1"], + "nbits": 32, + "encoding": 1, "value": 0, "default_value": 0, "properties": [] }, { - "name": [ - "ctrl2" - ], - "nbits": "32", + "name": ["ctrl2"], + "nbits": 32, + "encoding": 2, "value": 0, "default_value": 0, "properties": [] @@ -78,30 +48,25 @@ "double_precision": false, "elements": [ { - "name": [ - "x0" - ], - "nbits": "32", + "name": ["x0"], + "nbits": 32, + "encoding": 0, "value": 0, "default_value": 0, "properties": [] }, { - "name": [ - "x1" - ], - "nbits": "32", + "name": ["x1"], + "nbits": 32, + "encoding": 1, "value": 0, "default_value": 0, "properties": [] }, { - "name": [ - "x2", - "two", - "2" - ], - "nbits": "32", + "name": ["x2", "two", "2"], + "nbits": 32, + "encoding": 2, "value": 0, "default_value": 0, "properties": [] @@ -112,33 +77,27 @@ "name": "Floating point registers", "type": "fp_registers", "double_precision": true, - "double_precision_type": "extended", "elements": [ { - "name": [ - "ft0" - ], - "nbits": "64", + "name": ["ft0"], + "nbits": 64, + "encoding": 0, "value": 0.0, "default_value": 0.0, "properties": [] }, { - "name": [ - "F1", - "ft1" - ], - "nbits": "64", + "name": ["F1", "ft1"], + "nbits": 64, + "encoding": 1, "value": 0.0, "default_value": 0.0, "properties": [] }, { - "name": [ - "Field2", - "ft2" - ], - "nbits": "64", + "name": ["Field2", "ft2"], + "nbits": 64, + "encoding": 2, "value": 0.0, "default_value": 0.0, "properties": [] @@ -151,77 +110,27 @@ "double_precision": false, "elements": [ { - "name": [ - "fs0" - ], - "nbits": "32", - "value": 0.0, - "default_value": 0.0, - "properties": [] - }, - { - "name": [ - "fs1" - ], - "nbits": "32", - "value": 0.0, - "default_value": 0.0, - "properties": [] - }, - { - "name": [ - "fs2" - ], - "nbits": "32", - "value": 0.0, - "default_value": 0.0, - "properties": [] - } - ] - }, - { - "name": "Double floating point registers", - "type": "fp_registers", - "double_precision": true, - "double_precision_type": "linked", - "elements": [ - { - "name": [ - "FD0" - ], - "nbits": "64", + "name": ["fs0"], + "nbits": 32, + "encoding": 0, "value": 0.0, "default_value": 0.0, - "simple_reg": [ - "fs0", - "fs1" - ], "properties": [] }, { - "name": [ - "FD1" - ], - "nbits": "64", + "name": ["f1", "fs1"], + "nbits": 32, + "encoding": 1, "value": 0.0, "default_value": 0.0, - "simple_reg": [ - "fs1", - "fs2" - ], "properties": [] }, { - "name": [ - "FD2" - ], - "nbits": "64", + "name": ["fs2"], + "nbits": 32, + "encoding": 2, "value": 0.0, "default_value": 0.0, - "simple_reg": [ - "fs2", - "fs3" - ], "properties": [] } ] @@ -232,8 +141,6 @@ "name": "nop", "type": "Arithmetic integer", "signature_definition": "F0", - "signature": "nop", - "signatureRaw": "nop", "co": "1111111", "cop": "0000000000", "nwords": 1, @@ -250,19 +157,15 @@ "type": "cop", "startbit": 31, "stopbit": 28, - "valueField": "1111" + "value": "1111" } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "nop2", "type": "Arithmetic integer", "signature_definition": "F0", - "signature": "nop2", - "signatureRaw": "nop2", "co": "1000001", "cop": "0000000000", "nwords": 2, @@ -279,19 +182,15 @@ "type": "cop", "startbit": 63, "stopbit": 60, - "valueField": "1001" + "value": "1001" } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "imm", "type": "Arithmetic integer", - "signature_definition": "F0 F1 F2 F3", - "signature": "inm,inm-signed,inm-unsigned,address", - "signatureRaw": "inm inms inmu addr", + "signature_definition": "F0 F1, F2, F3", "co": "0000000", "cop": "0000000000", "nwords": 1, @@ -304,14 +203,14 @@ "stopbit": 0 }, { - "name": "inms", - "type": "inm-signed", + "name": "imms", + "type": "imm-signed", "startbit": 29, "stopbit": 26 }, { - "name": "inmu", - "type": "inm-unsigned", + "name": "immu", + "type": "imm-unsigned", "startbit": 8, "stopbit": 1 }, @@ -322,16 +221,12 @@ "stopbit": 10 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "reg", "type": "Arithmetic integer", - "signature_definition": "F0 F1 F2 F3 F4", - "signature": "reg,Ctrl-Reg,INT-Reg,SFP-Reg,DFP-Reg", - "signatureRaw": "reg ctrl int sfp dfp", + "signature_definition": "F0 F1, F2, F3, F4", "co": "0000000", "cop": "0000000000", "nwords": 1, @@ -368,16 +263,12 @@ "stopbit": 0 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "off", "type": "Arithmetic integer", - "signature_definition": "F0 F1 F2", - "signature": "off,offset_bytes,offset_words", - "signatureRaw": "off bytes words", + "signature_definition": "F0 F1, F2", "co": "0000000", "cop": "0000000000", "nwords": 1, @@ -402,16 +293,12 @@ "stopbit": 0 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "multi", "type": "Arithmetic integer", "signature_definition": "F0 F1", - "signature": "multi,inm-unsigned", - "signatureRaw": "multi imm4", "co": "1110011", "cop": "0000000000", "nwords": 1, @@ -425,21 +312,17 @@ }, { "name": "imm4", - "type": "inm-unsigned", + "type": "imm-unsigned", "startbit": 31, "stopbit": 28 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "multi", "type": "Arithmetic integer", "signature_definition": "F0 $", - "signature": "multi,$", - "signatureRaw": "multi $", "co": "1011101", "cop": "0000000000", "nwords": 1, @@ -452,16 +335,12 @@ "stopbit": 0 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "multi", "type": "Arithmetic integer", "signature_definition": "F0 F1", - "signature": "multi,inm-unsigned", - "signatureRaw": "multi imm5", "co": "1000001", "cop": "0000000000", "nwords": 1, @@ -475,21 +354,17 @@ }, { "name": "imm5", - "type": "inm-unsigned", + "type": "imm-unsigned", "startbit": 31, "stopbit": 27 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "pad", "type": "Arithmetic integer", - "signature_definition": "F0 F1 F2", - "signature": "multi,inm-signed,inm-signed", - "signatureRaw": "multi imm5 imm6", + "signature_definition": "F0 F1, F2", "co": "1111101", "cop": "0000000000", "nwords": 1, @@ -503,29 +378,25 @@ }, { "name": "imm5", - "type": "inm-signed", + "type": "imm-signed", "startbit": 31, "stopbit": 29, "padding": 2 }, { "name": "imm6", - "type": "inm-signed", + "type": "imm-signed", "startbit": 5, "stopbit": 2, "padding": 2 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "enum", "type": "Arithmetic integer", - "signature_definition": "F0 F1 F2 F3 F4", - "signature": "enum,enum,enum,enum,enum", - "signatureRaw": "enum enum1 enum2 enum3 enum4", + "signature_definition": "F0 F1, F2, F3, F4", "co": "1111111", "cop": "0000000000", "nwords": 1, @@ -566,16 +437,12 @@ "enum_name": "test" } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "lit", "type": "Arithmetic integer", - "signature_definition": "F0 F1a aF1 F1", - "signature": "lit,F1a,aF1,inm-signed", - "signatureRaw": "lit F1a aF1 imm", + "signature_definition": "F0 F1a, aF1, F1", "co": "1111000", "cop": "0000000000", "nwords": 1, @@ -589,14 +456,12 @@ }, { "name": "imm", - "type": "inm-signed", + "type": "imm-signed", "startbit": 3, "stopbit": 0 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" } ], "pseudoinstructions": [], @@ -624,7 +489,7 @@ { "name": ".zero", "action": "space", - "size": "1" + "size": 1 }, { "name": ".align", @@ -649,22 +514,22 @@ { "name": ".byte", "action": "byte", - "size": "1" + "size": 1 }, { "name": ".half", "action": "half_word", - "size": "2" + "size": 2 }, { "name": ".word", "action": "word", - "size": "4" + "size": 4 }, { "name": ".dword", "action": "double_word", - "size": "8" + "size": 8 }, { "name": ".float", @@ -682,48 +547,28 @@ "size": null } ], - "memory_layout": [ - { - "name": "ktext start", - "value": "0x00000020" - }, - { - "name": "ktext end", - "value": "0x0000002f" - }, - { - "name": "kdata start", - "value": "0x00000030" - }, - { - "name": "kdata end", - "value": "0x0000003f" - }, - { - "name": "text start", - "value": "0x00000000" - }, - { - "name": "text end", - "value": "0x0000000f" - }, - { - "name": "data start", - "value": "0x00000010" - }, - { - "name": "data end", - "value": "0x0000001f" - }, - { - "name": "stack start", - "value": "0x0FFFFFFC" - }, - { - "name": "stack end", - "value": "0x0FFFFFFF" + "memory_layout": { + "ktext": { + "start": 32, + "end": 47 + }, + "kdata": { + "start": 48, + "end": 63 + }, + "text": { + "start": 0, + "end": 15 + }, + "data": { + "start": 16, + "end": 31 + }, + "stack": { + "start": 268435452, + "end": 268435455 } - ], + }, "enums": { "enum1": { "a": 1, diff --git a/tests/architecture2.json b/tests/architecture2.json index 4bc5e0f..4950dd1 100644 --- a/tests/architecture2.json +++ b/tests/architecture2.json @@ -1,42 +1,15 @@ { - "arch_conf": [ - { - "name": "Name", - "value": "Test" - }, - { - "name": "Bits", - "value": "32" - }, - { - "name": "Description", - "value": "Test architecture" - }, - { - "name": "Data Format", - "value": "big_endian" - }, - { - "name": "Memory Alignment", - "value": "1" - }, - { - "name": "Main Function", - "value": "main" - }, - { - "name": "Passing Convention", - "value": "1" - }, - { - "name": "Sensitive Register Name", - "value": "0" - }, - { - "name": "CommentPrefix", - "value": "#" - } - ], + "config": { + "name": "Test", + "word_size": 32, + "description": "Test architecture", + "endianness": "big_endian", + "memory_alignment": true, + "main_function": "main", + "passing_convention": true, + "sensitive_register_name": false, + "comment_prefix": "#" + }, "components": [ { "name": "Integer registers", @@ -44,11 +17,9 @@ "double_precision": false, "elements": [ { - "name": [ - "x0", - "ZeRo" - ], - "nbits": "32", + "name": ["x0", "ZeRo"], + "nbits": 32, + "encoding": 0, "value": 0, "default_value": 0, "properties": [] @@ -61,8 +32,6 @@ "name": "nop", "type": "Arithmetic integer", "signature_definition": "F0", - "signature": "nop", - "signatureRaw": "nop", "co": "1111111", "cop": "0000000000", "nwords": 1, @@ -79,19 +48,15 @@ "type": "cop", "startbit": 31, "stopbit": 28, - "valueField": "1111" + "value": "1111" } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "ctrl", "type": "Arithmetic floating point", "signature_definition": "F0 F1", - "signature": "ctrl,Ctrl-Reg", - "signatureRaw": "ctrl reg", "co": "1111111", "cop": "0000000000", "nwords": 1, @@ -110,16 +75,12 @@ "stopbit": 28 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "int", "type": "Arithmetic integer", "signature_definition": "F0 F1", - "signature": "int,INT-Reg", - "signatureRaw": "int reg", "co": "1111111", "cop": "0000000000", "nwords": 1, @@ -138,16 +99,12 @@ "stopbit": 28 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "float", "type": "Arithmetic floating point", "signature_definition": "F0 F1", - "signature": "float,SFP-Reg", - "signatureRaw": "float reg", "co": "1111111", "cop": "0000000000", "nwords": 1, @@ -166,16 +123,12 @@ "stopbit": 28 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" }, { "name": "double", "type": "Arithmetic floating point", "signature_definition": "F0 F1", - "signature": "double,DFP-Reg", - "signatureRaw": "double reg", "co": "1111111", "cop": "0000000000", "nwords": 1, @@ -194,9 +147,7 @@ "stopbit": 28 } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" } ], "pseudoinstructions": [], @@ -224,49 +175,29 @@ { "name": ".zero", "action": "space", - "size": "1" + "size": 1 } ], - "memory_layout": [ - { - "name": "ktext start", - "value": "0x00000000" - }, - { - "name": "ktext end", - "value": "0x0000000f" - }, - { - "name": "kdata start", - "value": "0x00000010" - }, - { - "name": "kdata end", - "value": "0x0000001f" - }, - { - "name": "text start", - "value": "0x00000020" - }, - { - "name": "text end", - "value": "0x0000002f" - }, - { - "name": "data start", - "value": "0x00000030" - }, - { - "name": "data end", - "value": "0x0000003f" - }, - { - "name": "stack start", - "value": "0x0FFFFFFC" - }, - { - "name": "stack end", - "value": "0x0FFFFFFF" + "memory_layout": { + "ktext": { + "start": 0, + "end": 15 + }, + "kdata": { + "start": 16, + "end": 31 + }, + "text": { + "start": 32, + "end": 47 + }, + "data": { + "start": 48, + "end": 63 + }, + "stack": { + "start": 268435452, + "end": 268435455 } - ] + } } diff --git a/tests/architecture_no_kernel.json b/tests/architecture_no_kernel.json index 488fda3..ea5d557 100644 --- a/tests/architecture_no_kernel.json +++ b/tests/architecture_no_kernel.json @@ -1,42 +1,15 @@ { - "arch_conf": [ - { - "name": "Name", - "value": "Test" - }, - { - "name": "Bits", - "value": "32" - }, - { - "name": "Description", - "value": "Test architecture" - }, - { - "name": "Data Format", - "value": "big_endian" - }, - { - "name": "Memory Alignment", - "value": "1" - }, - { - "name": "Main Function", - "value": "main" - }, - { - "name": "Passing Convention", - "value": "1" - }, - { - "name": "Sensitive Register Name", - "value": "1" - }, - { - "name": "CommentPrefix", - "value": "#" - } - ], + "config": { + "name": "Test", + "word_size": 32, + "description": "Test architecture", + "endianness": "big_endian", + "memory_alignment": true, + "main_function": "main", + "passing_convention": true, + "sensitive_register_name": false, + "comment_prefix": "#" + }, "components": [ { "name": "Integer registers", @@ -44,10 +17,9 @@ "double_precision": false, "elements": [ { - "name": [ - "x0" - ], - "nbits": "32", + "name": ["x0"], + "nbits": 32, + "encoding": 0, "value": 0, "default_value": 0, "properties": [] @@ -60,8 +32,6 @@ "name": "nop", "type": "Arithmetic integer", "signature_definition": "F0", - "signature": "nop", - "signatureRaw": "nop", "co": "1111111", "cop": "0000000000", "nwords": 1, @@ -78,12 +48,10 @@ "type": "cop", "startbit": 31, "stopbit": 28, - "valueField": "1111" + "value": "1111" } ], - "definition": "", - "separated": [], - "help": "" + "definition": "" } ], "pseudoinstructions": [], @@ -111,33 +79,21 @@ { "name": ".zero", "action": "space", - "size": "1" + "size": 1 } ], - "memory_layout": [ - { - "name": "text start", - "value": "0x00000000" - }, - { - "name": "text end", - "value": "0x0000000f" - }, - { - "name": "data start", - "value": "0x00000010" - }, - { - "name": "data end", - "value": "0x0000001f" - }, - { - "name": "stack start", - "value": "0x0FFFFFFC" - }, - { - "name": "stack end", - "value": "0x0FFFFFFF" + "memory_layout": { + "text": { + "start": 0, + "end": 15 + }, + "data": { + "start": 16, + "end": 31 + }, + "stack": { + "start": 268435452, + "end": 268435455 } - ] + } }