diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..5c917c4 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,36 @@ +name: CI +on: + pull_request: + push: + branches: [main] + +jobs: + go-test: + name: Go Tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: '1.24.x' + - run: go test ./... + + js-test: + name: JS Tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: '1.24.x' + - uses: actions/setup-node@v4 + with: + node-version: '20.x' + - name: Install wasm-opt + run: sudo apt-get install -y binaryen + - name: Build and test + run: | + cd js + npm ci + npm run build + node --test dist/format.test.js diff --git a/js/README.md b/js/README.md index cffbb2e..32b69bb 100644 --- a/js/README.md +++ b/js/README.md @@ -1,6 +1,6 @@ # `@reteps/dockerfmt` -Bindings around the Golang `dockerfmt` tooling. It uses [tinygo](https://github.com/tinygo-org/tinygo) to compile the Go code to WebAssembly, which is then used in the JS bindings. +Bindings around the Golang `dockerfmt` tooling. It compiles the Go code to WebAssembly (using standard Go's `GOOS=js GOARCH=wasm` target), which is then used in the JS bindings. ```js diff --git a/js/format.go b/js/format.go index 58cd9b4..bb43e75 100644 --- a/js/format.go +++ b/js/format.go @@ -1,26 +1,37 @@ -//go:build js || wasm -// +build js wasm +// WASM entry point for the JS bindings. Built with standard Go (GOOS=js +// GOARCH=wasm), not TinyGo. TinyGo produces smaller binaries but has a +// blocking bug: +// - reflect.AssignableTo panics with interfaces, breaking encoding/json +// used by the moby/buildkit parser (https://github.com/tinygo-org/tinygo/issues/4277) + +//go:build js && wasm package main import ( "strings" + "syscall/js" "github.com/reteps/dockerfmt/lib" ) -//export formatBytes -func formatBytes(contents []byte, indentSize uint, newlineFlag bool, spaceRedirects bool) *byte { - originalLines := strings.SplitAfter(string(contents), "\n") +func formatBytes(_ js.Value, args []js.Value) any { + contents := args[0].String() + indentSize := uint(args[1].Int()) + newlineFlag := args[2].Bool() + spaceRedirects := args[3].Bool() + + originalLines := strings.SplitAfter(contents, "\n") c := &lib.Config{ IndentSize: indentSize, TrailingNewline: newlineFlag, SpaceRedirects: spaceRedirects, } - result := lib.FormatFileLines(originalLines, c) - bytes := []byte(result) - return &bytes[0] + return lib.FormatFileLines(originalLines, c) } -// Required to build -func main() {} +func main() { + js.Global().Set("__dockerfmt_formatBytes", js.FuncOf(formatBytes)) + // Block forever to keep the Go runtime alive for subsequent calls. + select {} +} diff --git a/js/format.test.ts b/js/format.test.ts new file mode 100644 index 0000000..40c4857 --- /dev/null +++ b/js/format.test.ts @@ -0,0 +1,82 @@ +import { describe, it } from 'node:test' +import assert from 'node:assert/strict' +import { formatDockerfileContents } from './node.js' + +const defaultOptions = { + indent: 4, + trailingNewline: true, + spaceRedirects: false, +} + +describe('formatDockerfileContents', () => { + it('formats a basic Dockerfile', async () => { + const input = `from alpine +run echo hello +`.trim() + + const result = await formatDockerfileContents(input, defaultOptions) + assert.equal(result, 'FROM alpine\nRUN echo hello\n') + }) + + it('formats CMD JSON form with spaces', async () => { + const input = `FROM alpine +CMD ["ls","-la"] +`.trim() + + const result = await formatDockerfileContents(input, defaultOptions) + assert.equal(result, 'FROM alpine\nCMD ["ls", "-la"]\n') + }) + + it('formats RUN JSON form with spaces', async () => { + const input = `FROM alpine +RUN ["echo","hello"] +`.trim() + + const result = await formatDockerfileContents(input, defaultOptions) + assert.equal(result, 'FROM alpine\nRUN ["echo", "hello"]\n') + }) + + it('handles the issue #25 reproduction case', async () => { + const input = ` +FROM nginx +WORKDIR /app +ARG PROJECT_DIR=/ +ARG NGINX_CONF=nginx.conf +COPY $NGINX_CONF /etc/nginx/conf.d/nginx.conf +COPY $PROJECT_DIR /app +CMD mkdir --parents /var/log/nginx && nginx -g "daemon off;" +`.trim() + + const result = await formatDockerfileContents(input, { + indent: 4, + spaceRedirects: false, + trailingNewline: true, + }) + + assert.ok(result.includes('FROM nginx')) + assert.ok(result.includes('WORKDIR /app')) + assert.ok(result.endsWith('\n')) + }) + + it('respects trailingNewline: false', async () => { + const input = 'FROM alpine' + const result = await formatDockerfileContents(input, { + ...defaultOptions, + trailingNewline: false, + }) + assert.ok(!result.endsWith('\n')) + }) + + it('respects indent option', async () => { + const input = `FROM alpine +RUN echo a \\ + && echo b +`.trim() + + const result = await formatDockerfileContents(input, { + ...defaultOptions, + indent: 2, + }) + assert.ok(result.includes(' && echo b')) + }) +}) diff --git a/js/format.ts b/js/format.ts index a4e3102..f34d340 100644 --- a/js/format.ts +++ b/js/format.ts @@ -12,54 +12,34 @@ export const formatDockerfileContents = async ( getWasm: () => Promise, ) => { const go = new Go() // Defined in wasm_exec.js - const encoder = new TextEncoder() - const decoder = new TextDecoder() - // get current working directory const wasmBuffer = await getWasm() const wasm = await WebAssembly.instantiate(wasmBuffer, go.importObject) /** * Do not await this promise, because it only resolves once the go main() * function has exited. But we need the main function to stay alive to be - * able to call the `parse` and `print` function. + * able to call the formatBytes function. */ go.run(wasm.instance) - const { memory, malloc, free, formatBytes } = wasm.instance.exports as { - memory: WebAssembly.Memory - malloc: (size: number) => number - free: (pointer: number) => void - formatBytes: ( - pointer: number, - length: number, - indent: number, - trailingNewline: boolean, - spaceRedirects: boolean, - ) => number - } - - const fileBufferBytes = encoder.encode(fileContents) - const filePointer = malloc(fileBufferBytes.byteLength) + const formatBytes = (globalThis as any).__dockerfmt_formatBytes as ( + contents: string, + indent: number, + trailingNewline: boolean, + spaceRedirects: boolean, + ) => string - new Uint8Array(memory.buffer).set(fileBufferBytes, filePointer) + if (typeof formatBytes !== 'function') { + throw new Error('dockerfmt WASM module did not register formatBytes') + } - // Call formatBytes function from WebAssembly - const resultPointer = formatBytes( - filePointer, - fileBufferBytes.byteLength, + return formatBytes( + fileContents, options.indent, options.trailingNewline, options.spaceRedirects, ) - - // Decode the result - const resultBytes = new Uint8Array(memory.buffer).subarray(resultPointer) - const end = resultBytes.indexOf(0) - const result = decoder.decode(resultBytes.subarray(0, end)) - free(filePointer) - - return result } export const formatDockerfile = () => { diff --git a/js/format.wasm b/js/format.wasm index 4c0ceba..7d1f40c 100644 Binary files a/js/format.wasm and b/js/format.wasm differ diff --git a/js/package.json b/js/package.json index 24e103c..0774c61 100644 --- a/js/package.json +++ b/js/package.json @@ -29,9 +29,8 @@ "dist" ], "scripts": { - "//": "Requires tinygo 0.38.0 or later", "build": "npm run build-go && npm run build-js", - "build-go": "tinygo build -o format.wasm -target wasm --no-debug", + "build-go": "GOOS=js GOARCH=wasm go build -ldflags='-s -w' -o format.wasm && wasm-opt --enable-bulk-memory -Oz -o format-opt.wasm format.wasm && mv format-opt.wasm format.wasm", "build-js": "tsc && cp format.wasm wasm_exec.js dist", "format": "prettier --write \"**/*.{js,ts,json}\"" }, diff --git a/js/wasm_exec.js b/js/wasm_exec.js index 9de9969..d71af9e 100644 --- a/js/wasm_exec.js +++ b/js/wasm_exec.js @@ -1,54 +1,26 @@ -// https://github.com/tinygo-org/tinygo/blob/64d8a043084cb9a56763192000e40ddc5dce733f/targets/wasm_exec.js // Copyright 2018 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// -// This file has been modified for use by the TinyGo compiler. -(() => { - // Map multiple JavaScript environments to a single common API, - // preferring web standards over Node.js API. - // - // Environments considered: - // - Browsers - // - Node.js - // - Electron - // - Parcel - - if (typeof global !== "undefined") { - // global already exists - } else if (typeof window !== "undefined") { - window.global = window; - } else if (typeof self !== "undefined") { - self.global = self; - } else { - throw new Error("cannot export Go (neither global, window nor self is defined)"); - } - - if (!global.require && typeof require !== "undefined") { - global.require = require; - } - - if (!global.fs && global.require) { - global.fs = require("node:fs"); - } +"use strict"; +(() => { const enosys = () => { const err = new Error("not implemented"); err.code = "ENOSYS"; return err; }; - if (!global.fs) { + if (!globalThis.fs) { let outputBuf = ""; - global.fs = { - constants: { O_WRONLY: -1, O_RDWR: -1, O_CREAT: -1, O_TRUNC: -1, O_APPEND: -1, O_EXCL: -1 }, // unused + globalThis.fs = { + constants: { O_WRONLY: -1, O_RDWR: -1, O_CREAT: -1, O_TRUNC: -1, O_APPEND: -1, O_EXCL: -1, O_DIRECTORY: -1 }, // unused writeSync(fd, buf) { outputBuf += decoder.decode(buf); const nl = outputBuf.lastIndexOf("\n"); if (nl != -1) { - console.log(outputBuf.substr(0, nl)); - outputBuf = outputBuf.substr(nl + 1); + console.log(outputBuf.substring(0, nl)); + outputBuf = outputBuf.substring(nl + 1); } return buf.length; }, @@ -86,8 +58,8 @@ }; } - if (!global.process) { - global.process = { + if (!globalThis.process) { + globalThis.process = { getuid() { return -1; }, getgid() { return -1; }, geteuid() { return -1; }, @@ -101,53 +73,66 @@ } } - if (!global.crypto) { - const nodeCrypto = require("node:crypto"); - global.crypto = { - getRandomValues(b) { - nodeCrypto.randomFillSync(b); - }, - }; + if (!globalThis.path) { + globalThis.path = { + resolve(...pathSegments) { + return pathSegments.join("/"); + } + } } - if (!global.performance) { - global.performance = { - now() { - const [sec, nsec] = process.hrtime(); - return sec * 1000 + nsec / 1000000; - }, - }; + if (!globalThis.crypto) { + throw new Error("globalThis.crypto is not available, polyfill required (crypto.getRandomValues only)"); } - if (!global.TextEncoder) { - global.TextEncoder = require("node:util").TextEncoder; + if (!globalThis.performance) { + throw new Error("globalThis.performance is not available, polyfill required (performance.now only)"); } - if (!global.TextDecoder) { - global.TextDecoder = require("node:util").TextDecoder; + if (!globalThis.TextEncoder) { + throw new Error("globalThis.TextEncoder is not available, polyfill required"); } - // End of polyfills for common API. + if (!globalThis.TextDecoder) { + throw new Error("globalThis.TextDecoder is not available, polyfill required"); + } const encoder = new TextEncoder("utf-8"); const decoder = new TextDecoder("utf-8"); - let reinterpretBuf = new DataView(new ArrayBuffer(8)); - var logLine = []; - const wasmExit = {}; // thrown to exit via proc_exit (not an error) - global.Go = class { + globalThis.Go = class { constructor() { - this._callbackTimeouts = new Map(); + this.argv = ["js"]; + this.env = {}; + this.exit = (code) => { + if (code !== 0) { + console.warn("exit code:", code); + } + }; + this._exitPromise = new Promise((resolve) => { + this._resolveExitPromise = resolve; + }); + this._pendingEvent = null; + this._scheduledTimeouts = new Map(); this._nextCallbackTimeoutID = 1; - const mem = () => { - // The buffer may change when requesting more memory. - return new DataView(this._inst.exports.memory.buffer); + const setInt64 = (addr, v) => { + this.mem.setUint32(addr + 0, v, true); + this.mem.setUint32(addr + 4, Math.floor(v / 4294967296), true); } - const unboxValue = (v_ref) => { - reinterpretBuf.setBigInt64(0, v_ref, true); - const f = reinterpretBuf.getFloat64(0, true); + const setInt32 = (addr, v) => { + this.mem.setUint32(addr + 0, v, true); + } + + const getInt64 = (addr) => { + const low = this.mem.getUint32(addr + 0, true); + const high = this.mem.getInt32(addr + 4, true); + return low + high * 4294967296; + } + + const loadValue = (addr) => { + const f = this.mem.getFloat64(addr, true); if (f === 0) { return undefined; } @@ -155,77 +140,69 @@ return f; } - const id = v_ref & 0xffffffffn; + const id = this.mem.getUint32(addr, true); return this._values[id]; } + const storeValue = (addr, v) => { + const nanHead = 0x7FF80000; - const loadValue = (addr) => { - let v_ref = mem().getBigUint64(addr, true); - return unboxValue(v_ref); - } - - const boxValue = (v) => { - const nanHead = 0x7FF80000n; - - if (typeof v === "number") { + if (typeof v === "number" && v !== 0) { if (isNaN(v)) { - return nanHead << 32n; - } - if (v === 0) { - return (nanHead << 32n) | 1n; + this.mem.setUint32(addr + 4, nanHead, true); + this.mem.setUint32(addr, 0, true); + return; } - reinterpretBuf.setFloat64(0, v, true); - return reinterpretBuf.getBigInt64(0, true); + this.mem.setFloat64(addr, v, true); + return; } - switch (v) { - case undefined: - return 0n; - case null: - return (nanHead << 32n) | 2n; - case true: - return (nanHead << 32n) | 3n; - case false: - return (nanHead << 32n) | 4n; + if (v === undefined) { + this.mem.setFloat64(addr, 0, true); + return; } let id = this._ids.get(v); if (id === undefined) { id = this._idPool.pop(); if (id === undefined) { - id = BigInt(this._values.length); + id = this._values.length; } this._values[id] = v; this._goRefCounts[id] = 0; this._ids.set(v, id); } this._goRefCounts[id]++; - let typeFlag = 1n; + let typeFlag = 0; switch (typeof v) { + case "object": + if (v !== null) { + typeFlag = 1; + } + break; case "string": - typeFlag = 2n; + typeFlag = 2; break; case "symbol": - typeFlag = 3n; + typeFlag = 3; break; case "function": - typeFlag = 4n; + typeFlag = 4; break; } - return id | ((nanHead | typeFlag) << 32n); - } - - const storeValue = (addr, v) => { - let v_ref = boxValue(v); - mem().setBigUint64(addr, v_ref, true); + this.mem.setUint32(addr + 4, nanHead | typeFlag, true); + this.mem.setUint32(addr, id, true); } - const loadSlice = (array, len, cap) => { - return new Uint8Array(this._inst.exports.memory.buffer, array, len); + const loadSlice = (addr) => { + const array = getInt64(addr + 0); + const len = getInt64(addr + 8); + return new Uint8Array(this._inst.exports.mem.buffer, array, len); } - const loadSliceOfValues = (array, len, cap) => { + const loadSliceOfValues = (addr) => { + const array = getInt64(addr + 0); + const len = getInt64(addr + 8); const a = new Array(len); for (let i = 0; i < len; i++) { a[i] = loadValue(array + i * 8); @@ -233,287 +210,353 @@ return a; } - const loadString = (ptr, len) => { - return decoder.decode(new DataView(this._inst.exports.memory.buffer, ptr, len)); + const loadString = (addr) => { + const saddr = getInt64(addr + 0); + const len = getInt64(addr + 8); + return decoder.decode(new DataView(this._inst.exports.mem.buffer, saddr, len)); + } + + const testCallExport = (a, b) => { + this._inst.exports.testExport0(); + return this._inst.exports.testExport(a, b); } const timeOrigin = Date.now() - performance.now(); this.importObject = { - wasi_snapshot_preview1: { - // https://github.com/WebAssembly/WASI/blob/main/phases/snapshot/docs.md#fd_write - fd_write: function(fd, iovs_ptr, iovs_len, nwritten_ptr) { - let nwritten = 0; - if (fd == 1) { - for (let iovs_i=0; iovs_i 0, // dummy - fd_fdstat_get: () => 0, // dummy - fd_seek: () => 0, // dummy - proc_exit: (code) => { + _gotest: { + add: (a, b) => a + b, + callExport: testCallExport, + }, + gojs: { + // Go's SP does not change as long as no Go code is running. Some operations (e.g. calls, getters and setters) + // may synchronously trigger a Go event handler. This makes Go code get executed in the middle of the imported + // function. A goroutine can switch to a new stack if the current stack is too small (see morestack function). + // This changes the SP, thus we have to update the SP used by the imported function. + + // func wasmExit(code int32) + "runtime.wasmExit": (sp) => { + sp >>>= 0; + const code = this.mem.getInt32(sp + 8, true); this.exited = true; - this.exitCode = code; - this._resolveExitPromise(); - throw wasmExit; + delete this._inst; + delete this._values; + delete this._goRefCounts; + delete this._ids; + delete this._idPool; + this.exit(code); }, - random_get: (bufPtr, bufLen) => { - crypto.getRandomValues(loadSlice(bufPtr, bufLen)); - return 0; + + // func wasmWrite(fd uintptr, p unsafe.Pointer, n int32) + "runtime.wasmWrite": (sp) => { + sp >>>= 0; + const fd = getInt64(sp + 8); + const p = getInt64(sp + 16); + const n = this.mem.getInt32(sp + 24, true); + fs.writeSync(fd, new Uint8Array(this._inst.exports.mem.buffer, p, n)); }, - }, - gojs: { - // func ticks() float64 - "runtime.ticks": () => { - return timeOrigin + performance.now(); + + // func resetMemoryDataView() + "runtime.resetMemoryDataView": (sp) => { + sp >>>= 0; + this.mem = new DataView(this._inst.exports.mem.buffer); + }, + + // func nanotime1() int64 + "runtime.nanotime1": (sp) => { + sp >>>= 0; + setInt64(sp + 8, (timeOrigin + performance.now()) * 1000000); + }, + + // func walltime() (sec int64, nsec int32) + "runtime.walltime": (sp) => { + sp >>>= 0; + const msec = (new Date).getTime(); + setInt64(sp + 8, msec / 1000); + this.mem.setInt32(sp + 16, (msec % 1000) * 1000000, true); }, - // func sleepTicks(timeout float64) - "runtime.sleepTicks": (timeout) => { - // Do not sleep, only reactivate scheduler after the given timeout. - setTimeout(() => { - if (this.exited) return; - try { - this._inst.exports.go_scheduler(); - } catch (e) { - if (e !== wasmExit) throw e; - } - }, timeout); + // func scheduleTimeoutEvent(delay int64) int32 + "runtime.scheduleTimeoutEvent": (sp) => { + sp >>>= 0; + const id = this._nextCallbackTimeoutID; + this._nextCallbackTimeoutID++; + this._scheduledTimeouts.set(id, setTimeout( + () => { + this._resume(); + while (this._scheduledTimeouts.has(id)) { + // for some reason Go failed to register the timeout event, log and try again + // (temporary workaround for https://github.com/golang/go/issues/28975) + console.warn("scheduleTimeoutEvent: missed timeout event"); + this._resume(); + } + }, + getInt64(sp + 8), + )); + this.mem.setInt32(sp + 16, id, true); + }, + + // func clearTimeoutEvent(id int32) + "runtime.clearTimeoutEvent": (sp) => { + sp >>>= 0; + const id = this.mem.getInt32(sp + 8, true); + clearTimeout(this._scheduledTimeouts.get(id)); + this._scheduledTimeouts.delete(id); + }, + + // func getRandomData(r []byte) + "runtime.getRandomData": (sp) => { + sp >>>= 0; + crypto.getRandomValues(loadSlice(sp + 8)); }, // func finalizeRef(v ref) - "syscall/js.finalizeRef": (v_ref) => { - // Note: TinyGo does not support finalizers so this is only called - // for one specific case, by js.go:jsString. and can/might leak memory. - const id = v_ref & 0xffffffffn; - if (this._goRefCounts?.[id] !== undefined) { - this._goRefCounts[id]--; - if (this._goRefCounts[id] === 0) { - const v = this._values[id]; - this._values[id] = null; - this._ids.delete(v); - this._idPool.push(id); - } - } else { - console.error("syscall/js.finalizeRef: unknown id", id); + "syscall/js.finalizeRef": (sp) => { + sp >>>= 0; + const id = this.mem.getUint32(sp + 8, true); + this._goRefCounts[id]--; + if (this._goRefCounts[id] === 0) { + const v = this._values[id]; + this._values[id] = null; + this._ids.delete(v); + this._idPool.push(id); } }, // func stringVal(value string) ref - "syscall/js.stringVal": (value_ptr, value_len) => { - value_ptr >>>= 0; - const s = loadString(value_ptr, value_len); - return boxValue(s); + "syscall/js.stringVal": (sp) => { + sp >>>= 0; + storeValue(sp + 24, loadString(sp + 8)); }, // func valueGet(v ref, p string) ref - "syscall/js.valueGet": (v_ref, p_ptr, p_len) => { - let prop = loadString(p_ptr, p_len); - let v = unboxValue(v_ref); - let result = Reflect.get(v, prop); - return boxValue(result); + "syscall/js.valueGet": (sp) => { + sp >>>= 0; + const result = Reflect.get(loadValue(sp + 8), loadString(sp + 16)); + sp = this._inst.exports.getsp() >>> 0; // see comment above + storeValue(sp + 32, result); }, // func valueSet(v ref, p string, x ref) - "syscall/js.valueSet": (v_ref, p_ptr, p_len, x_ref) => { - const v = unboxValue(v_ref); - const p = loadString(p_ptr, p_len); - const x = unboxValue(x_ref); - Reflect.set(v, p, x); + "syscall/js.valueSet": (sp) => { + sp >>>= 0; + Reflect.set(loadValue(sp + 8), loadString(sp + 16), loadValue(sp + 32)); }, // func valueDelete(v ref, p string) - "syscall/js.valueDelete": (v_ref, p_ptr, p_len) => { - const v = unboxValue(v_ref); - const p = loadString(p_ptr, p_len); - Reflect.deleteProperty(v, p); + "syscall/js.valueDelete": (sp) => { + sp >>>= 0; + Reflect.deleteProperty(loadValue(sp + 8), loadString(sp + 16)); }, // func valueIndex(v ref, i int) ref - "syscall/js.valueIndex": (v_ref, i) => { - return boxValue(Reflect.get(unboxValue(v_ref), i)); + "syscall/js.valueIndex": (sp) => { + sp >>>= 0; + storeValue(sp + 24, Reflect.get(loadValue(sp + 8), getInt64(sp + 16))); }, // valueSetIndex(v ref, i int, x ref) - "syscall/js.valueSetIndex": (v_ref, i, x_ref) => { - Reflect.set(unboxValue(v_ref), i, unboxValue(x_ref)); + "syscall/js.valueSetIndex": (sp) => { + sp >>>= 0; + Reflect.set(loadValue(sp + 8), getInt64(sp + 16), loadValue(sp + 24)); }, // func valueCall(v ref, m string, args []ref) (ref, bool) - "syscall/js.valueCall": (ret_addr, v_ref, m_ptr, m_len, args_ptr, args_len, args_cap) => { - const v = unboxValue(v_ref); - const name = loadString(m_ptr, m_len); - const args = loadSliceOfValues(args_ptr, args_len, args_cap); + "syscall/js.valueCall": (sp) => { + sp >>>= 0; try { - const m = Reflect.get(v, name); - storeValue(ret_addr, Reflect.apply(m, v, args)); - mem().setUint8(ret_addr + 8, 1); + const v = loadValue(sp + 8); + const m = Reflect.get(v, loadString(sp + 16)); + const args = loadSliceOfValues(sp + 32); + const result = Reflect.apply(m, v, args); + sp = this._inst.exports.getsp() >>> 0; // see comment above + storeValue(sp + 56, result); + this.mem.setUint8(sp + 64, 1); } catch (err) { - storeValue(ret_addr, err); - mem().setUint8(ret_addr + 8, 0); + sp = this._inst.exports.getsp() >>> 0; // see comment above + storeValue(sp + 56, err); + this.mem.setUint8(sp + 64, 0); } }, // func valueInvoke(v ref, args []ref) (ref, bool) - "syscall/js.valueInvoke": (ret_addr, v_ref, args_ptr, args_len, args_cap) => { + "syscall/js.valueInvoke": (sp) => { + sp >>>= 0; try { - const v = unboxValue(v_ref); - const args = loadSliceOfValues(args_ptr, args_len, args_cap); - storeValue(ret_addr, Reflect.apply(v, undefined, args)); - mem().setUint8(ret_addr + 8, 1); + const v = loadValue(sp + 8); + const args = loadSliceOfValues(sp + 16); + const result = Reflect.apply(v, undefined, args); + sp = this._inst.exports.getsp() >>> 0; // see comment above + storeValue(sp + 40, result); + this.mem.setUint8(sp + 48, 1); } catch (err) { - storeValue(ret_addr, err); - mem().setUint8(ret_addr + 8, 0); + sp = this._inst.exports.getsp() >>> 0; // see comment above + storeValue(sp + 40, err); + this.mem.setUint8(sp + 48, 0); } }, // func valueNew(v ref, args []ref) (ref, bool) - "syscall/js.valueNew": (ret_addr, v_ref, args_ptr, args_len, args_cap) => { - const v = unboxValue(v_ref); - const args = loadSliceOfValues(args_ptr, args_len, args_cap); + "syscall/js.valueNew": (sp) => { + sp >>>= 0; try { - storeValue(ret_addr, Reflect.construct(v, args)); - mem().setUint8(ret_addr + 8, 1); + const v = loadValue(sp + 8); + const args = loadSliceOfValues(sp + 16); + const result = Reflect.construct(v, args); + sp = this._inst.exports.getsp() >>> 0; // see comment above + storeValue(sp + 40, result); + this.mem.setUint8(sp + 48, 1); } catch (err) { - storeValue(ret_addr, err); - mem().setUint8(ret_addr+ 8, 0); + sp = this._inst.exports.getsp() >>> 0; // see comment above + storeValue(sp + 40, err); + this.mem.setUint8(sp + 48, 0); } }, // func valueLength(v ref) int - "syscall/js.valueLength": (v_ref) => { - return unboxValue(v_ref).length; + "syscall/js.valueLength": (sp) => { + sp >>>= 0; + setInt64(sp + 16, parseInt(loadValue(sp + 8).length)); }, // valuePrepareString(v ref) (ref, int) - "syscall/js.valuePrepareString": (ret_addr, v_ref) => { - const s = String(unboxValue(v_ref)); - const str = encoder.encode(s); - storeValue(ret_addr, str); - mem().setInt32(ret_addr + 8, str.length, true); + "syscall/js.valuePrepareString": (sp) => { + sp >>>= 0; + const str = encoder.encode(String(loadValue(sp + 8))); + storeValue(sp + 16, str); + setInt64(sp + 24, str.length); }, // valueLoadString(v ref, b []byte) - "syscall/js.valueLoadString": (v_ref, slice_ptr, slice_len, slice_cap) => { - const str = unboxValue(v_ref); - loadSlice(slice_ptr, slice_len, slice_cap).set(str); + "syscall/js.valueLoadString": (sp) => { + sp >>>= 0; + const str = loadValue(sp + 8); + loadSlice(sp + 16).set(str); }, // func valueInstanceOf(v ref, t ref) bool - "syscall/js.valueInstanceOf": (v_ref, t_ref) => { - return unboxValue(v_ref) instanceof unboxValue(t_ref); + "syscall/js.valueInstanceOf": (sp) => { + sp >>>= 0; + this.mem.setUint8(sp + 24, (loadValue(sp + 8) instanceof loadValue(sp + 16)) ? 1 : 0); }, // func copyBytesToGo(dst []byte, src ref) (int, bool) - "syscall/js.copyBytesToGo": (ret_addr, dest_addr, dest_len, dest_cap, src_ref) => { - let num_bytes_copied_addr = ret_addr; - let returned_status_addr = ret_addr + 4; // Address of returned boolean status variable - - const dst = loadSlice(dest_addr, dest_len); - const src = unboxValue(src_ref); + "syscall/js.copyBytesToGo": (sp) => { + sp >>>= 0; + const dst = loadSlice(sp + 8); + const src = loadValue(sp + 32); if (!(src instanceof Uint8Array || src instanceof Uint8ClampedArray)) { - mem().setUint8(returned_status_addr, 0); // Return "not ok" status + this.mem.setUint8(sp + 48, 0); return; } const toCopy = src.subarray(0, dst.length); dst.set(toCopy); - mem().setUint32(num_bytes_copied_addr, toCopy.length, true); - mem().setUint8(returned_status_addr, 1); // Return "ok" status + setInt64(sp + 40, toCopy.length); + this.mem.setUint8(sp + 48, 1); }, - // copyBytesToJS(dst ref, src []byte) (int, bool) - // Originally copied from upstream Go project, then modified: - // https://github.com/golang/go/blob/3f995c3f3b43033013013e6c7ccc93a9b1411ca9/misc/wasm/wasm_exec.js#L404-L416 - "syscall/js.copyBytesToJS": (ret_addr, dst_ref, src_addr, src_len, src_cap) => { - let num_bytes_copied_addr = ret_addr; - let returned_status_addr = ret_addr + 4; // Address of returned boolean status variable - - const dst = unboxValue(dst_ref); - const src = loadSlice(src_addr, src_len); + // func copyBytesToJS(dst ref, src []byte) (int, bool) + "syscall/js.copyBytesToJS": (sp) => { + sp >>>= 0; + const dst = loadValue(sp + 8); + const src = loadSlice(sp + 16); if (!(dst instanceof Uint8Array || dst instanceof Uint8ClampedArray)) { - mem().setUint8(returned_status_addr, 0); // Return "not ok" status + this.mem.setUint8(sp + 48, 0); return; } const toCopy = src.subarray(0, dst.length); dst.set(toCopy); - mem().setUint32(num_bytes_copied_addr, toCopy.length, true); - mem().setUint8(returned_status_addr, 1); // Return "ok" status + setInt64(sp + 40, toCopy.length); + this.mem.setUint8(sp + 48, 1); + }, + + "debug": (value) => { + console.log(value); }, } }; - - // Go 1.20 uses 'env'. Go 1.21 uses 'gojs'. - // For compatibility, we use both as long as Go 1.20 is supported. - this.importObject.env = this.importObject.gojs; } async run(instance) { + if (!(instance instanceof WebAssembly.Instance)) { + throw new Error("Go.run: WebAssembly.Instance expected"); + } this._inst = instance; + this.mem = new DataView(this._inst.exports.mem.buffer); this._values = [ // JS values that Go currently has references to, indexed by reference id NaN, 0, null, true, false, - global, + globalThis, this, ]; - this._goRefCounts = []; // number of references that Go has to a JS value, indexed by reference id - this._ids = new Map(); // mapping from JS values to reference ids - this._idPool = []; // unused ids that have been garbage collected - this.exited = false; // whether the Go program has exited - this.exitCode = 0; - - if (this._inst.exports._start) { - let exitPromise = new Promise((resolve, reject) => { - this._resolveExitPromise = resolve; - }); - - // Run program, but catch the wasmExit exception that's thrown - // to return back here. - try { - this._inst.exports._start(); - } catch (e) { - if (e !== wasmExit) throw e; + this._goRefCounts = new Array(this._values.length).fill(Infinity); // number of references that Go has to a JS value, indexed by reference id + this._ids = new Map([ // mapping from JS values to reference ids + [0, 1], + [null, 2], + [true, 3], + [false, 4], + [globalThis, 5], + [this, 6], + ]); + this._idPool = []; // unused ids that have been garbage collected + this.exited = false; // whether the Go program has exited + + // Pass command line arguments and environment variables to WebAssembly by writing them to the linear memory. + let offset = 4096; + + const strPtr = (str) => { + const ptr = offset; + const bytes = encoder.encode(str + "\0"); + new Uint8Array(this.mem.buffer, offset, bytes.length).set(bytes); + offset += bytes.length; + if (offset % 8 !== 0) { + offset += 8 - (offset % 8); } + return ptr; + }; + + const argc = this.argv.length; + + const argvPtrs = []; + this.argv.forEach((arg) => { + argvPtrs.push(strPtr(arg)); + }); + argvPtrs.push(0); + + const keys = Object.keys(this.env).sort(); + keys.forEach((key) => { + argvPtrs.push(strPtr(`${key}=${this.env[key]}`)); + }); + argvPtrs.push(0); + + const argv = offset; + argvPtrs.forEach((ptr) => { + this.mem.setUint32(offset, ptr, true); + this.mem.setUint32(offset + 4, 0, true); + offset += 8; + }); + + // The linker guarantees global data starts from at least wasmMinDataAddr. + // Keep in sync with cmd/link/internal/ld/data.go:wasmMinDataAddr. + const wasmMinDataAddr = 4096 + 8192; + if (offset >= wasmMinDataAddr) { + throw new Error("total length of command line and environment variables exceeds limit"); + } - await exitPromise; - return this.exitCode; - } else { - this._inst.exports._initialize(); + this._inst.exports.run(argc, argv); + if (this.exited) { + this._resolveExitPromise(); } + await this._exitPromise; } _resume() { if (this.exited) { throw new Error("Go program has already exited"); } - try { - this._inst.exports.resume(); - } catch (e) { - if (e !== wasmExit) throw e; - } + this._inst.exports.resume(); if (this.exited) { this._resolveExitPromise(); } diff --git a/lib/format.go b/lib/format.go index 62d1087..ea0a9c1 100644 --- a/lib/format.go +++ b/lib/format.go @@ -2,7 +2,6 @@ package lib import ( "bytes" - "encoding/json" "fmt" "log" "os" @@ -121,14 +120,6 @@ func extractDirectiveContent(n *ExtendedNode, flagCount int) (string, bool) { return parts[1], true } -// marshalJSONArray formats a string slice as a JSON array with spaces after commas. -func marshalJSONArray(items []string) (string, error) { - b, err := Marshal(items) - if err != nil { - return "", err - } - return strings.ReplaceAll(string(b), "\",\"", "\", \""), nil -} var nodeFormatters map[string]func(*ExtendedNode, *Config) string @@ -429,13 +420,8 @@ func formatRun(n *ExtendedNode, c *Config) string { content, _ = extractDirectiveContent(n, len(flags)) } - var jsonItems []string - if json.Unmarshal([]byte(content), &jsonItems) == nil { - outStr, err := marshalJSONArray(jsonItems) - if err != nil { - panic(err) - } - content = outStr + "\n" + if jsonItems, ok := unmarshalJSONStringArray(content); ok { + content = marshalJSONStringArray(jsonItems) + "\n" } else { content = formatShell(content, hereDoc, c) if hereDoc { @@ -475,21 +461,6 @@ func formatBasic(n *ExtendedNode, c *Config) string { return IndentFollowingLines(n.directive()+" "+value, c.IndentSize) } -// Marshal is a UTF-8 friendly marshaler. Go's json.Marshal is not UTF-8 -// friendly because it replaces the valid UTF-8 and JSON characters "&". "<", -// ">" with the "slash u" unicode escaped forms (e.g. \u0026). It preemptively -// escapes for HTML friendliness. Where text may include any of these -// characters, json.Marshal should not be used. Playground of Go breaking a -// title: https://play.golang.org/p/o2hiX0c62oN -// Source: https://stackoverflow.com/a/69502657/5684541 -func Marshal(i interface{}) ([]byte, error) { - buffer := &bytes.Buffer{} - encoder := json.NewEncoder(buffer) - encoder.SetEscapeHTML(false) - err := encoder.Encode(i) - return bytes.TrimRight(buffer.Bytes(), "\n"), err -} - func getCmd(n *ExtendedNode, shouldSplitNode bool) []string { cmd := []string{} for node := n; node != nil; node = node.Next { @@ -521,17 +492,13 @@ func formatCmd(n *ExtendedNode, c *Config) string { } // If JSON form (attribute or decodable), format as JSON array with spaces - var jsonItems []string - if isJSON || json.Unmarshal([]byte(content), &jsonItems) == nil { + jsonItems, jsonOK := unmarshalJSONStringArray(content) + if isJSON || jsonOK { items := getCmd(n.Next, false) if !isJSON && len(items) == 0 { items = jsonItems } - outStr, err := marshalJSONArray(items) - if err != nil { - return "" - } - return n.directive() + " " + outStr + "\n" + return n.directive() + " " + marshalJSONStringArray(items) + "\n" } // Otherwise, format as shell command diff --git a/lib/format_test.go b/lib/format_test.go index c98a555..e1ffbeb 100644 --- a/lib/format_test.go +++ b/lib/format_test.go @@ -96,26 +96,58 @@ func TestIndentFollowingLines(t *testing.T) { } } -// --- Marshal --- +// --- marshalJSONStringArray --- -func TestMarshal(t *testing.T) { +func TestMarshalJSONStringArray(t *testing.T) { tests := []struct { name string - input interface{} + input []string expected string }{ - {"string slice", []string{"a", "b"}, `["a","b"]`}, + {"string slice", []string{"a", "b"}, `["a", "b"]`}, {"angle brackets not escaped", []string{""}, `[""]`}, {"ampersand not escaped", []string{"a&b"}, `["a&b"]`}, {"empty slice", []string{}, `[]`}, {"single item", []string{"hello"}, `["hello"]`}, + {"with quotes", []string{`say "hi"`}, `["say \"hi\""]`}, + {"with backslash", []string{`a\b`}, `["a\\b"]`}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - result, err := Marshal(tt.input) - require.NoError(t, err) - assert.Equal(t, tt.expected, string(result)) + result := marshalJSONStringArray(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} + +// --- unmarshalJSONStringArray --- + +func TestUnmarshalJSONStringArray(t *testing.T) { + tests := []struct { + name string + input string + expected []string + ok bool + }{ + {"simple array", `["a", "b"]`, []string{"a", "b"}, true}, + {"no spaces", `["a","b"]`, []string{"a", "b"}, true}, + {"empty array", `[]`, []string{}, true}, + {"single item", `["hello"]`, []string{"hello"}, true}, + {"with escapes", `["say \"hi\""]`, []string{`say "hi"`}, true}, + {"not json", `echo hello`, nil, false}, + {"not array", `"hello"`, nil, false}, + {"mixed types", `["a", 1]`, nil, false}, + {"nested array", `[["a"]]`, nil, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, ok := unmarshalJSONStringArray(tt.input) + assert.Equal(t, tt.ok, ok) + if ok { + assert.Equal(t, tt.expected, result) + } }) } } diff --git a/lib/json.go b/lib/json.go new file mode 100644 index 0000000..eca5471 --- /dev/null +++ b/lib/json.go @@ -0,0 +1,156 @@ +// Since our JSON needs are limited to []string arrays (Dockerfile JSON-form +// directives like CMD ["ls", "-la"]), we implement just that here. +// +// This also preserves the non-HTML-escaping behavior: Go's json.Marshal +// escapes <, >, & as \uXXXX for HTML safety, which we don't want. +package lib + +import ( + "fmt" + "strconv" + "strings" +) + +// unmarshalJSONStringArray parses a JSON array of strings. +// Returns the parsed strings and true on success, or nil and false if +// the input is not a valid JSON array of strings. +func unmarshalJSONStringArray(data string) ([]string, bool) { + s := strings.TrimSpace(data) + if len(s) < 2 || s[0] != '[' || s[len(s)-1] != ']' { + return nil, false + } + inner := strings.TrimSpace(s[1 : len(s)-1]) + if inner == "" { + return []string{}, true + } + + var result []string + pos := 0 + for { + pos = skipWhitespace(inner, pos) + if pos >= len(inner) { + break + } + str, newPos, ok := parseJSONString(inner, pos) + if !ok { + return nil, false + } + result = append(result, str) + pos = skipWhitespace(inner, newPos) + if pos >= len(inner) { + break + } + if inner[pos] != ',' { + return nil, false + } + pos++ + } + return result, true +} + +func skipWhitespace(s string, pos int) int { + for pos < len(s) && (s[pos] == ' ' || s[pos] == '\t' || s[pos] == '\n' || s[pos] == '\r') { + pos++ + } + return pos +} + +// parseJSONString parses a JSON string starting at pos. +// Returns the unescaped string, the position after the closing quote, and success. +func parseJSONString(s string, pos int) (string, int, bool) { + if pos >= len(s) || s[pos] != '"' { + return "", pos, false + } + pos++ + var b strings.Builder + for pos < len(s) { + ch := s[pos] + if ch == '"' { + return b.String(), pos + 1, true + } + if ch == '\\' { + pos++ + if pos >= len(s) { + return "", pos, false + } + switch s[pos] { + case '"', '\\', '/': + b.WriteByte(s[pos]) + case 'n': + b.WriteByte('\n') + case 'r': + b.WriteByte('\r') + case 't': + b.WriteByte('\t') + case 'b': + b.WriteByte('\b') + case 'f': + b.WriteByte('\f') + case 'u': + if pos+4 >= len(s) { + return "", pos, false + } + val, err := strconv.ParseUint(s[pos+1:pos+5], 16, 16) + if err != nil { + return "", pos, false + } + b.WriteRune(rune(val)) + pos += 4 + default: + return "", pos, false + } + } else if ch < 0x20 { + return "", pos, false + } else { + b.WriteByte(ch) + } + pos++ + } + return "", pos, false +} + +// marshalJSONStringArray formats a string slice as a JSON array with spaces +// after commas: ["a", "b"]. Unlike encoding/json, it does not escape HTML +// characters (<, >, &). +func marshalJSONStringArray(items []string) string { + var b strings.Builder + b.WriteByte('[') + for i, item := range items { + if i > 0 { + b.WriteString(", ") + } + writeJSONString(&b, item) + } + b.WriteByte(']') + return b.String() +} + +// writeJSONString writes a JSON-escaped string (including surrounding quotes) to b. +func writeJSONString(b *strings.Builder, s string) { + b.WriteByte('"') + for _, r := range s { + switch r { + case '"': + b.WriteString(`\"`) + case '\\': + b.WriteString(`\\`) + case '\n': + b.WriteString(`\n`) + case '\r': + b.WriteString(`\r`) + case '\t': + b.WriteString(`\t`) + case '\b': + b.WriteString(`\b`) + case '\f': + b.WriteString(`\f`) + default: + if r < 0x20 { + fmt.Fprintf(b, `\u%04x`, r) + } else { + b.WriteRune(r) + } + } + } + b.WriteByte('"') +}