From 2869e510c185220268fb9d1b8ade2e8dfd9d5f99 Mon Sep 17 00:00:00 2001 From: wujunchen Date: Sun, 21 Jun 2026 19:44:41 +0800 Subject: [PATCH] fix(opencode): strip all leading BOMs in Bom.split (#33092) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bom.split() removed only the first U+FEFF via text.slice(1), so content with multiple consecutive leading BOMs kept the extras. Because join() re-runs split() before re-adding a single BOM, each write/edit cycle dropped at most one extra BOM, so a file with N leading BOMs needed several edit passes to normalize to one. Align split() with the existing splitBom() in packages/core/src/file-mutation.ts and strip all leading BOMs via /^+/. join() is unchanged; multi-BOM content now normalizes to exactly one (or zero) BOM in a single pass, fixing the write/edit/ apply_patch callers transitively. Add unit tests for split()/join() (the util had none) covering no-BOM, single, multiple consecutive, and non-leading BOMs. Fixes #33092 Claude-Session: https://claude.ai/code/session_01444Qm3LDDrcHSy1HtETM6G --- packages/opencode/src/util/bom.ts | 4 +-- packages/opencode/test/util/bom.test.ts | 44 +++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 packages/opencode/test/util/bom.test.ts diff --git a/packages/opencode/src/util/bom.ts b/packages/opencode/src/util/bom.ts index f015651e97fd..16b578eeccaf 100644 --- a/packages/opencode/src/util/bom.ts +++ b/packages/opencode/src/util/bom.ts @@ -5,8 +5,8 @@ const BOM_CODE = 0xfeff const BOM = String.fromCharCode(BOM_CODE) export function split(text: string) { - if (text.charCodeAt(0) !== BOM_CODE) return { bom: false, text } - return { bom: true, text: text.slice(1) } + const stripped = text.replace(/^\uFEFF+/, "") + return { bom: stripped.length !== text.length, text: stripped } } export function join(text: string, bom: boolean) { diff --git a/packages/opencode/test/util/bom.test.ts b/packages/opencode/test/util/bom.test.ts new file mode 100644 index 000000000000..7fffb6beb19f --- /dev/null +++ b/packages/opencode/test/util/bom.test.ts @@ -0,0 +1,44 @@ +import { describe, expect, test } from "bun:test" +import * as Bom from "../../src/util/bom" + +const BOM = "\uFEFF" + +describe("Bom.split", () => { + test("returns text unchanged when there is no BOM", () => { + expect(Bom.split("hello")).toEqual({ bom: false, text: "hello" }) + }) + + test("strips a single leading BOM", () => { + expect(Bom.split(BOM + "hello")).toEqual({ bom: true, text: "hello" }) + }) + + test("strips all consecutive leading BOMs", () => { + expect(Bom.split(BOM.repeat(4) + "hello")).toEqual({ bom: true, text: "hello" }) + }) + + test("does not strip a BOM that is not at the start", () => { + expect(Bom.split("hello" + BOM)).toEqual({ bom: false, text: "hello" + BOM }) + }) + + test("handles an empty string", () => { + expect(Bom.split("")).toEqual({ bom: false, text: "" }) + }) +}) + +describe("Bom.join", () => { + test("adds a BOM when requested", () => { + expect(Bom.join("hello", true)).toBe(BOM + "hello") + }) + + test("omits the BOM when not requested", () => { + expect(Bom.join("hello", false)).toBe("hello") + }) + + test("normalizes multiple existing leading BOMs to exactly one", () => { + expect(Bom.join(BOM.repeat(4) + "hello", true)).toBe(BOM + "hello") + }) + + test("strips all existing leading BOMs when none is requested", () => { + expect(Bom.join(BOM.repeat(3) + "hello", false)).toBe("hello") + }) +})