From 29dab0f29fda6bc066821499d64e211fbe002fcf Mon Sep 17 00:00:00 2001 From: Jan Guth Date: Tue, 7 Apr 2026 17:59:51 +0200 Subject: [PATCH 01/10] feat(env): format-preserving byte-level YAML splice fast path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up from PR #126 review: every successful merge sync produced a noisy git diff because spliceYAMLStructural round-tripped the whole document through yaml.Marshal, which would normalize whitespace, quoting, and field ordering even for keys the splice didn't touch. New byte-level fast path (spliceYAMLByteLevel): 1. Parse local for yaml.Node Line/Column metadata. 2. Compute byte ranges for every top-level key. 3. Walk local byte-by-byte: - out-of-scope key range → copy verbatim - in-scope key, in merged → emit yaml.Marshal of just that one key:value pair - in-scope key, not merged → drop (deletion) 4. Append additions (merged keys not in local) at the end. The output preserves whitespace, quoting, and comments BYTE-FOR-BYTE on every byte that wasn't touched. Only the in-scope keys go through the yaml.v3 emitter. Dispatch order in spliceYAML is now: 1. Byte-level splice (best fidelity) 2. Structural splice (existing fallback for shapes byte-level can't handle — empty doc, non-mapping root, exotic Line metadata) 3. Text splice (conflict path, unchanged) New helper marshalSingleTopLevelKey serializes one key:value pair as a top-level YAML mapping entry. Used for both substitutions and appends in the byte-level path. New regression test TestSpliceYAMLByteLevel_PreservesOutOfScopeBytesVerbatim uses a local file with non-default formatting (4-space indent, double-quoted keys, inline comments) to prove that none of those choices get normalized away when an unrelated top-level key is spliced. Co-Authored-By: Claude Opus 4.6 (1M context) --- pkg/env/splice.go | 229 ++++++++++++++++++++++++++++++++++++++--- pkg/env/splice_test.go | 62 +++++++++++ 2 files changed, 279 insertions(+), 12 deletions(-) diff --git a/pkg/env/splice.go b/pkg/env/splice.go index 029d55e..cb422b7 100644 --- a/pkg/env/splice.go +++ b/pkg/env/splice.go @@ -111,30 +111,235 @@ func containsConflictMarkers(b []byte) bool { bytes.Contains(b, []byte(">>>>>>> ")) } -// spliceYAML dispatches between structural and text splice based on -// whether `merged` is parseable YAML. +// spliceYAML dispatches between three splice strategies, in order of +// fidelity: +// +// 1. Byte-level splice (best fidelity): when `merged` is valid YAML +// AND the local file parses cleanly into a top-level mapping, copy +// out-of-scope byte ranges from `local` verbatim and only re-emit +// the in-scope keys' values via yaml.Marshal. This preserves +// whitespace, quoting style, and comment formatting on every +// unchanged byte. Out-of-scope content is byte-identical to the +// input — no spurious git-diff churn from yaml.v3 emitter +// normalization. +// +// 2. Structural splice (fallback for YAML quirks): when the byte-level +// path can't be used (rare — e.g. an unrecognised local-file +// shape), fall back to the Node-tree merge that round-trips the +// whole document through yaml.Marshal. Out-of-scope keys keep +// their content but lose exact whitespace. +// +// 3. Text splice (conflict path): when `merged` contains +// `git merge-file` conflict markers, it doesn't parse as YAML at +// all. Find the byte range of each scoped top-level key in `local` +// using yaml.v3 Node Line/Column metadata and substitute the +// conflicted text. Bytes outside the scoped ranges are preserved +// verbatim. +// +// (1) was added as the format-preserving emitter follow-up to PR #126. +// Before it, every successful merge sync produced a noisy git diff +// because yaml.Marshal would re-emit the entire document with its own +// preferred whitespace and quoting style — even for keys the splice +// didn't touch. func spliceYAML(local, merged []byte, selectors []string) ([]byte, error) { scope := topLevelKeysFromSelectors(selectors) - // Fast path: merged is valid YAML. Do a structural splice that - // preserves the most layout information (comments on non-replaced - // keys). + // Path 1: byte-level splice. Requires valid YAML on both sides + // (no conflict markers in `merged`) and a parseable + // top-level-mapping `local`. if !containsConflictMarkers(merged) { - out, err := spliceYAMLStructural(local, merged, scope) + out, err := spliceYAMLByteLevel(local, merged, scope) if err == nil { return out, nil } - // If structural splicing fails for any reason, fall through to - // text splicing as a defensive fallback. + // Path 2: structural splice. Same parseability requirements + // but rebuilds the whole document via yaml.Marshal. Used when + // the byte-level splice can't handle the local shape (e.g. + // flow-style mapping where Line metadata isn't reliable). + out, err = spliceYAMLStructural(local, merged, scope) + if err == nil { + return out, nil + } + // Both structural paths failed. Fall through to the text + // splice, which can recover from more weirdness. } - // Conflict path: find the byte ranges of the in-scope top-level keys - // in `local` and replace them with the merged text. This preserves - // out-of-scope content (envs:, profiles:, comments) byte-for-byte - // even when the merged output contains conflict markers. + // Path 3: text/conflict path. return spliceYAMLText(local, merged, scope) } +// spliceYAMLByteLevel is the format-preserving fast path. It walks +// `local` byte-by-byte, copies out-of-scope ranges verbatim, and +// substitutes in-scope ranges with the marshaled value subtree from +// `merged`. The output preserves whitespace, quoting style, and +// comments on every byte that wasn't touched. +// +// Returns an error (instead of falling through to a slower path) when +// the local YAML can't be parsed or has a non-mapping root. The +// caller is expected to fall back to spliceYAMLStructural / spliceYAMLText. +func spliceYAMLByteLevel(local, merged []byte, scope map[string]bool) ([]byte, error) { + // Parse local — we need yaml.v3 Node metadata to find byte ranges. + var localDoc yaml.Node + if err := yaml.Unmarshal(local, &localDoc); err != nil { + return nil, fmt.Errorf("byte splice: parse local: %w", err) + } + // Empty document content: header-only file or whitespace. Defer + // to the structural-path empty-doc handling rather than + // duplicating it. + if localDoc.Kind == 0 || len(localDoc.Content) == 0 { + return nil, fmt.Errorf("byte splice: empty local document, defer to structural") + } + if localDoc.Kind != yaml.DocumentNode { + return nil, fmt.Errorf("byte splice: unexpected local YAML structure") + } + localRoot := localDoc.Content[0] + if localRoot.Kind != yaml.MappingNode { + return nil, fmt.Errorf("byte splice: local root is not a mapping (kind %d)", localRoot.Kind) + } + + // Parse merged for the in-scope key values we're substituting in. + var mergedDoc yaml.Node + if err := yaml.Unmarshal(merged, &mergedDoc); err != nil { + return nil, fmt.Errorf("byte splice: parse merged: %w", err) + } + if mergedDoc.Kind != yaml.DocumentNode || len(mergedDoc.Content) == 0 { + return nil, fmt.Errorf("byte splice: merged has no content") + } + mergedRoot := mergedDoc.Content[0] + if mergedRoot.Kind != yaml.MappingNode { + return nil, fmt.Errorf("byte splice: merged root is not a mapping") + } + + // Build merged key→valueNode lookup, preserving source order for + // deterministic addition placement. + mergedByKey := make(map[string]*yaml.Node, len(mergedRoot.Content)/2) + mergedOrder := make([]string, 0, len(mergedRoot.Content)/2) + for i := 0; i+1 < len(mergedRoot.Content); i += 2 { + k := mergedRoot.Content[i].Value + mergedByKey[k] = mergedRoot.Content[i+1] + mergedOrder = append(mergedOrder, k) + } + + // Compute byte ranges for every top-level key in local. Reuses + // the existing topLevelKeyRanges helper that already handles the + // "blank/comment lines belong to the next key" attribution. + ranges := topLevelKeyRanges(local, localRoot) + if len(ranges) == 0 { + return nil, fmt.Errorf("byte splice: no top-level key ranges in local") + } + + // Walk local byte-by-byte. For each top-level key: + // - in-scope, in merged → emit serialized merged value + // - in-scope, not in merged → drop (deletion) + // - out-of-scope → copy verbatim + var out bytes.Buffer + cursor := 0 + consumed := make(map[string]bool, len(mergedByKey)) + + for _, r := range ranges { + // Out-of-scope keys: copy from cursor to end of this range + // verbatim (this preserves both the key itself AND any + // preceding bytes between the previous key's end and this + // range's start, which is where leading comments/blanks for + // out-of-scope keys live). + if !scope[r.key] { + out.Write(local[cursor:r.endByte]) + cursor = r.endByte + continue + } + + // In-scope key. First copy any bytes between cursor and the + // start of this range (preserves any blank lines / comments + // that the boundary attribution placed BEFORE this key but + // AFTER the previous one). Then emit the substituted block. + if r.startByte > cursor { + out.Write(local[cursor:r.startByte]) + } + + mergedVal, ok := mergedByKey[r.key] + if !ok { + // Deletion: skip the range entirely. Don't emit + // anything; the cursor advance below skips local's + // version too. + cursor = r.endByte + continue + } + + // Substitute: serialize the merged key:value pair as YAML + // and emit it. We use a small synthetic mapping with just + // this one key so the indentation comes out as a top-level + // entry. Re-emit produces clean YAML for this slice but + // leaves all other bytes untouched. + serialized, serErr := marshalSingleTopLevelKey(r.key, mergedVal) + if serErr != nil { + return nil, fmt.Errorf("byte splice: marshal %q: %w", r.key, serErr) + } + out.Write(serialized) + consumed[r.key] = true + cursor = r.endByte + } + + // Tail bytes after the last top-level key (trailing whitespace, + // trailing comments). + if cursor < len(local) { + out.Write(local[cursor:]) + } + + // Additions: any merged keys that weren't already in local. Append + // them at the end in merged source order. Ensure a separating + // newline first so we don't fuse with the previous trailing line. + additions := make([]string, 0, len(mergedOrder)) + for _, k := range mergedOrder { + if !consumed[k] && scope[k] { + additions = append(additions, k) + } + } + if len(additions) > 0 { + buf := out.Bytes() + if len(buf) > 0 && buf[len(buf)-1] != '\n' { + out.WriteByte('\n') + } + for _, k := range additions { + serialized, serErr := marshalSingleTopLevelKey(k, mergedByKey[k]) + if serErr != nil { + return nil, fmt.Errorf("byte splice: marshal addition %q: %w", k, serErr) + } + out.Write(serialized) + } + } + + return out.Bytes(), nil +} + +// marshalSingleTopLevelKey serializes one key:value pair as a +// top-level YAML mapping entry. The output starts with `key:` at +// column 0 and includes a trailing newline. Used by the byte-level +// splice to emit only the in-scope keys without re-emitting the +// surrounding document. +func marshalSingleTopLevelKey(key string, value *yaml.Node) ([]byte, error) { + doc := &yaml.Node{ + Kind: yaml.DocumentNode, + Content: []*yaml.Node{{ + Kind: yaml.MappingNode, + Tag: "!!map", + Content: []*yaml.Node{ + {Kind: yaml.ScalarNode, Tag: "!!str", Value: key}, + value, + }, + }}, + } + var buf bytes.Buffer + enc := yaml.NewEncoder(&buf) + enc.SetIndent(2) + if err := enc.Encode(doc); err != nil { + return nil, err + } + if err := enc.Close(); err != nil { + return nil, err + } + return buf.Bytes(), nil +} + // spliceYAMLStructural is the fast path: parse both sides, rewrite the // local tree in place, re-emit. func spliceYAMLStructural(local, merged []byte, scope map[string]bool) ([]byte, error) { diff --git a/pkg/env/splice_test.go b/pkg/env/splice_test.go index 4a510c2..7b6ae77 100644 --- a/pkg/env/splice_test.go +++ b/pkg/env/splice_test.go @@ -87,6 +87,68 @@ envs: } } +// TestSpliceYAMLByteLevel_PreservesOutOfScopeBytesVerbatim verifies +// that the format-preserving byte-level splice keeps out-of-scope +// content byte-identical to the input. The previous structural +// splice round-tripped the whole document through yaml.Marshal, +// which would normalize whitespace, quoting, and field ordering +// even for keys the splice didn't touch — producing noisy git +// diffs on every successful merge sync. +func TestSpliceYAMLByteLevel_PreservesOutOfScopeBytesVerbatim(t *testing.T) { + // Local file uses non-default formatting choices (4-space + // indent, double-quoted values, trailing comments) that the + // yaml.v3 emitter would normalize away. + local := []byte(`# Top of file +binaries: + kubectl: {} + kustomize: {} + +envs: + "github.com/keep/me": # an inline comment + files: + "a.yaml": "docs/a.yaml" + # trailing comment inside envs +`) + merged := []byte(`binaries: + kubectl: {} + kustomize: {} + tilt: {} +`) + out, err := spliceSelectedScope(local, merged, []string{"binaries"}, "b.yaml") + if err != nil { + t.Fatalf("splice: %v", err) + } + outStr := string(out) + + // 1) merged binaries content present. + if !strings.Contains(outStr, "tilt") { + t.Errorf("merged tilt missing, got:\n%s", outStr) + } + + // 2) Out-of-scope envs section preserved BYTE-FOR-BYTE. Each of + // these would normally be reformatted by yaml.v3: + // - 4-space indent → 2-space indent + // - double-quoted keys → unquoted + // - inline comment dropped + // - "trailing comment inside envs" dropped + expectations := []string{ + `"github.com/keep/me": # an inline comment`, + ` files:`, + ` "a.yaml": "docs/a.yaml"`, + ` # trailing comment inside envs`, + } + for _, exp := range expectations { + if !strings.Contains(outStr, exp) { + t.Errorf("byte-preservation lost line %q in output:\n%s", exp, outStr) + } + } + + // 3) Top-of-file comment preserved. + if !strings.Contains(outStr, "# Top of file") { + t.Errorf("header comment lost: %s", outStr) + } +} + // TestSpliceYAMLStructural_NonContiguousScopedKeys verifies that the // structural splice handles two scoped keys separated by an out-of-scope // key in the local file, without reordering.. From fd6420e9bd23b4d41592e2b61d7c0f805d44c6cb Mon Sep 17 00:00:00 2001 From: Jan Guth Date: Tue, 7 Apr 2026 18:33:40 +0200 Subject: [PATCH 02/10] fix(env): address copilot review on #133 - spliceYAML doc comment now accurately describes the byte-level path: out-of-scope bytes are byte-identical, but in-scope ranges are re-encoded as full key:value pairs and may have key quoting/style and key-line comments rewritten. - TestSpliceYAMLByteLevel_PreservesOutOfScopeBytesVerbatim now asserts exact byte equality on the envs: slice instead of substring matching, so any reformatter touching that region fails the test even when the surrounding lines still appear. Co-Authored-By: Claude Opus 4.6 (1M context) --- pkg/env/splice.go | 16 +++++++++------- pkg/env/splice_test.go | 35 +++++++++++++++++++---------------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/pkg/env/splice.go b/pkg/env/splice.go index cb422b7..a962b2d 100644 --- a/pkg/env/splice.go +++ b/pkg/env/splice.go @@ -115,13 +115,15 @@ func containsConflictMarkers(b []byte) bool { // fidelity: // // 1. Byte-level splice (best fidelity): when `merged` is valid YAML -// AND the local file parses cleanly into a top-level mapping, copy -// out-of-scope byte ranges from `local` verbatim and only re-emit -// the in-scope keys' values via yaml.Marshal. This preserves -// whitespace, quoting style, and comment formatting on every -// unchanged byte. Out-of-scope content is byte-identical to the -// input — no spurious git-diff churn from yaml.v3 emitter -// normalization. +// AND the local file parses cleanly into a top-level mapping, +// copy out-of-scope byte ranges from `local` verbatim and re-emit +// each in-scope top-level `key: value` pair via yaml.Marshal. +// Bytes outside the scoped ranges are preserved exactly, but +// formatting WITHIN those ranges can change because yaml.v3 +// re-encodes the whole entry — including key quoting/style and +// any key-line comments. Out-of-scope content is byte-identical +// to the input — no spurious git-diff churn from yaml.v3 emitter +// normalization outside the replaced ranges. // // 2. Structural splice (fallback for YAML quirks): when the byte-level // path can't be used (rare — e.g. an unrecognised local-file diff --git a/pkg/env/splice_test.go b/pkg/env/splice_test.go index 7b6ae77..c65199f 100644 --- a/pkg/env/splice_test.go +++ b/pkg/env/splice_test.go @@ -1,6 +1,7 @@ package env import ( + "bytes" "strings" "testing" ) @@ -125,22 +126,24 @@ envs: t.Errorf("merged tilt missing, got:\n%s", outStr) } - // 2) Out-of-scope envs section preserved BYTE-FOR-BYTE. Each of - // these would normally be reformatted by yaml.v3: - // - 4-space indent → 2-space indent - // - double-quoted keys → unquoted - // - inline comment dropped - // - "trailing comment inside envs" dropped - expectations := []string{ - `"github.com/keep/me": # an inline comment`, - ` files:`, - ` "a.yaml": "docs/a.yaml"`, - ` # trailing comment inside envs`, - } - for _, exp := range expectations { - if !strings.Contains(outStr, exp) { - t.Errorf("byte-preservation lost line %q in output:\n%s", exp, outStr) - } + // 2) Out-of-scope envs section preserved BYTE-FOR-BYTE. The + // check is intentionally an exact slice comparison: we cut the + // `envs:` block out of both the original local input and the + // spliced output and compare them directly. A reformatter that + // changed any whitespace, quoting, or comment placement inside + // this region would fail the comparison even if the surrounding + // lines still matched. The local input was crafted to use + // 4-space indent, double-quoted keys, an inline comment, and + // a trailing comment — every one of which the yaml.v3 emitter + // would normally rewrite. + localEnvsStart := bytes.Index(local, []byte("envs:")) + outEnvsStart := bytes.Index(out, []byte("envs:")) + if localEnvsStart < 0 || outEnvsStart < 0 { + t.Fatalf("envs: marker missing — local=%d out=%d\n%s", localEnvsStart, outEnvsStart, outStr) + } + if !bytes.Equal(local[localEnvsStart:], out[outEnvsStart:]) { + t.Errorf("envs section not byte-identical:\nwant:\n%q\ngot:\n%q", + local[localEnvsStart:], out[outEnvsStart:]) } // 3) Top-of-file comment preserved. From 498e20d94bd6aa89d2b932c7ed5cb35aee84e235 Mon Sep 17 00:00:00 2001 From: Jan Guth Date: Tue, 7 Apr 2026 18:47:03 +0200 Subject: [PATCH 03/10] fix(env): address copilot review on #133 round 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Validate top-level key byte ranges before slicing in spliceYAMLByteLevel. yaml.v3 Line/Column metadata can be unreliable for flow-style mappings or single-line documents, and an out-of-bounds slice would panic at runtime instead of letting spliceYAML fall back to the structural / text path. - Preserve CRLF line endings when the local file uses them. yaml.v3 always emits LF, so without this the spliced regions would produce a mixed-ending file (CRLF in preserved bytes, LF in spliced regions) — noisy in diffs and bad for Windows-aware tooling. New usesCRLF helper detects the local file's convention and the byte-level splice rewrites emitted blocks (and the appended-keys separator newline) to match. - Strategy doc comment now refers to marshalSingleTopLevelKey instead of yaml.Marshal, which is what the implementation actually uses. Co-Authored-By: Claude Opus 4.6 (1M context) --- pkg/env/splice.go | 52 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/pkg/env/splice.go b/pkg/env/splice.go index a962b2d..9975a6a 100644 --- a/pkg/env/splice.go +++ b/pkg/env/splice.go @@ -101,6 +101,14 @@ func topLevelKeysFromSelectors(selectors []string) map[string]bool { return keys } +// usesCRLF reports whether the file appears to use Windows-style CRLF +// line endings. We check for `\r\n` rather than just `\n` so a stray +// `\n` doesn't fool us. The byte-level splice uses this to keep +// emitted regions consistent with the local file's line endings. +func usesCRLF(b []byte) bool { + return bytes.Contains(b, []byte("\r\n")) +} + // containsConflictMarkers checks if a byte slice contains git merge-file // conflict markers (`<<<<<<<`, `=======`, `>>>>>>>`). We detect the full // set; a partial match could be legitimate content (e.g. a markdown @@ -117,7 +125,9 @@ func containsConflictMarkers(b []byte) bool { // 1. Byte-level splice (best fidelity): when `merged` is valid YAML // AND the local file parses cleanly into a top-level mapping, // copy out-of-scope byte ranges from `local` verbatim and re-emit -// each in-scope top-level `key: value` pair via yaml.Marshal. +// each in-scope top-level `key: value` pair via the +// marshalSingleTopLevelKey helper (which builds a one-key +// synthetic document and runs it through yaml.v3's encoder). // Bytes outside the scoped ranges are preserved exactly, but // formatting WITHIN those ranges can change because yaml.v3 // re-encodes the whole entry — including key quoting/style and @@ -229,6 +239,29 @@ func spliceYAMLByteLevel(local, merged []byte, scope map[string]bool) ([]byte, e if len(ranges) == 0 { return nil, fmt.Errorf("byte splice: no top-level key ranges in local") } + // Validate that the byte ranges are sane before we start + // slicing. yaml.v3 Line/Column metadata is occasionally missing + // or stale (flow-style mappings, multiple keys on the same + // line, single-line documents). Without this check a slice like + // local[cursor:r.endByte] can panic at runtime instead of + // returning an error and letting spliceYAML fall back to the + // structural / text path. + prevEnd := 0 + for i, r := range ranges { + if r.startByte < 0 || r.endByte < 0 || + r.startByte > len(local) || r.endByte > len(local) || + r.startByte > r.endByte || + r.startByte < prevEnd { + return nil, fmt.Errorf("byte splice: invalid range for key %q (start=%d end=%d prevEnd=%d len=%d)", + r.key, r.startByte, r.endByte, prevEnd, len(local)) + } + // At least one of the ranges must make forward progress, or + // the splice loop would emit nothing useful. + if i == 0 && r.endByte == 0 { + return nil, fmt.Errorf("byte splice: zero-length first range for key %q", r.key) + } + prevEnd = r.endByte + } // Walk local byte-by-byte. For each top-level key: // - in-scope, in merged → emit serialized merged value @@ -276,6 +309,13 @@ func spliceYAMLByteLevel(local, merged []byte, scope map[string]bool) ([]byte, e if serErr != nil { return nil, fmt.Errorf("byte splice: marshal %q: %w", r.key, serErr) } + // yaml.v3 always emits LF line endings. If local uses CRLF + // we'd otherwise produce a mixed-ending file (CRLF in the + // preserved bytes, LF in the spliced regions), which is + // noisy in diffs and trips Windows-aware tooling. + if usesCRLF(local) { + serialized = bytes.ReplaceAll(serialized, []byte("\n"), []byte("\r\n")) + } out.Write(serialized) consumed[r.key] = true cursor = r.endByte @@ -297,15 +337,23 @@ func spliceYAMLByteLevel(local, merged []byte, scope map[string]bool) ([]byte, e } } if len(additions) > 0 { + crlf := usesCRLF(local) buf := out.Bytes() if len(buf) > 0 && buf[len(buf)-1] != '\n' { - out.WriteByte('\n') + if crlf { + out.WriteString("\r\n") + } else { + out.WriteByte('\n') + } } for _, k := range additions { serialized, serErr := marshalSingleTopLevelKey(k, mergedByKey[k]) if serErr != nil { return nil, fmt.Errorf("byte splice: marshal addition %q: %w", k, serErr) } + if crlf { + serialized = bytes.ReplaceAll(serialized, []byte("\n"), []byte("\r\n")) + } out.Write(serialized) } } From e7d9c81615b9c899f3f899bce6664d921afbee74 Mon Sep 17 00:00:00 2001 From: Jan Guth Date: Tue, 7 Apr 2026 18:59:26 +0200 Subject: [PATCH 04/10] fix(env): tighten range validation, hoist usesCRLF, add CRLF test (#133 round 3) - Range validation now requires startByte < endByte (was allowing startByte == endByte, which let zero-length yaml.v3 metadata for flow-style mappings sneak through and emit garbage). - Hoist the usesCRLF call out of the per-key splice loop. Scanning the entire file inside the loop made the splice O(n*k); the line-ending convention can't change between keys, so detecting it once is enough. - New TestSpliceYAMLByteLevel_PreservesCRLFLineEndings test covers the CRLF normalization path end-to-end: a CRLF local file stays CRLF after the splice with no bare LFs anywhere in the output. Co-Authored-By: Claude Opus 4.6 (1M context) --- pkg/env/splice.go | 22 +++++++++++++--------- pkg/env/splice_test.go | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/pkg/env/splice.go b/pkg/env/splice.go index 9975a6a..07bea03 100644 --- a/pkg/env/splice.go +++ b/pkg/env/splice.go @@ -246,23 +246,28 @@ func spliceYAMLByteLevel(local, merged []byte, scope map[string]bool) ([]byte, e // local[cursor:r.endByte] can panic at runtime instead of // returning an error and letting spliceYAML fall back to the // structural / text path. + // Every range must be non-empty and make forward progress. + // startByte == endByte was previously allowed, which let a + // flow-style mapping with zero-length yaml.v3 metadata sneak + // through and emit garbage. We require startByte < endByte for + // every range so the splice loop is guaranteed to advance. prevEnd := 0 - for i, r := range ranges { + for _, r := range ranges { if r.startByte < 0 || r.endByte < 0 || r.startByte > len(local) || r.endByte > len(local) || - r.startByte > r.endByte || + r.startByte >= r.endByte || r.startByte < prevEnd { return nil, fmt.Errorf("byte splice: invalid range for key %q (start=%d end=%d prevEnd=%d len=%d)", r.key, r.startByte, r.endByte, prevEnd, len(local)) } - // At least one of the ranges must make forward progress, or - // the splice loop would emit nothing useful. - if i == 0 && r.endByte == 0 { - return nil, fmt.Errorf("byte splice: zero-length first range for key %q", r.key) - } prevEnd = r.endByte } + // Detect line-ending convention once. usesCRLF scans the whole + // file, so calling it inside the per-key loop turns the splice + // into O(n*k); hoist it here so the cost is paid exactly once. + crlf := usesCRLF(local) + // Walk local byte-by-byte. For each top-level key: // - in-scope, in merged → emit serialized merged value // - in-scope, not in merged → drop (deletion) @@ -313,7 +318,7 @@ func spliceYAMLByteLevel(local, merged []byte, scope map[string]bool) ([]byte, e // we'd otherwise produce a mixed-ending file (CRLF in the // preserved bytes, LF in the spliced regions), which is // noisy in diffs and trips Windows-aware tooling. - if usesCRLF(local) { + if crlf { serialized = bytes.ReplaceAll(serialized, []byte("\n"), []byte("\r\n")) } out.Write(serialized) @@ -337,7 +342,6 @@ func spliceYAMLByteLevel(local, merged []byte, scope map[string]bool) ([]byte, e } } if len(additions) > 0 { - crlf := usesCRLF(local) buf := out.Bytes() if len(buf) > 0 && buf[len(buf)-1] != '\n' { if crlf { diff --git a/pkg/env/splice_test.go b/pkg/env/splice_test.go index c65199f..b727f7f 100644 --- a/pkg/env/splice_test.go +++ b/pkg/env/splice_test.go @@ -88,6 +88,40 @@ envs: } } +// TestSpliceYAMLByteLevel_PreservesCRLFLineEndings verifies that a +// local file using Windows-style CRLF endings stays CRLF after the +// splice. yaml.v3 always emits LF, so without the CRLF normalization +// path the spliced regions would mix endings with the verbatim +// regions and produce noisy diffs on every sync from a Windows +// consumer. +func TestSpliceYAMLByteLevel_PreservesCRLFLineEndings(t *testing.T) { + // Build a local file with explicit CRLF separators. + local := []byte("binaries:\r\n kubectl: {}\r\n\r\nenvs:\r\n github.com/x/y: {}\r\n") + merged := []byte("binaries:\n kubectl: {}\n helm: {}\n") + out, err := spliceSelectedScope(local, merged, []string{"binaries"}, "b.yaml") + if err != nil { + t.Fatalf("splice: %v", err) + } + // Every newline in the output should be CRLF — no bare LF. + if bytes.Contains(out, []byte("\n")) && !bytes.Contains(out, []byte("\r\n")) { + t.Errorf("expected CRLF endings, got bare LF:\n%q", out) + } + for i := 0; i < len(out); i++ { + if out[i] == '\n' { + if i == 0 || out[i-1] != '\r' { + t.Errorf("bare LF at byte %d in CRLF file:\n%q", i, out) + break + } + } + } + // Out-of-scope envs section is byte-identical including its CRLFs. + envsStart := bytes.Index(local, []byte("envs:")) + outEnvsStart := bytes.Index(out, []byte("envs:")) + if !bytes.Equal(local[envsStart:], out[outEnvsStart:]) { + t.Errorf("envs section not byte-identical under CRLF") + } +} + // TestSpliceYAMLByteLevel_PreservesOutOfScopeBytesVerbatim verifies // that the format-preserving byte-level splice keeps out-of-scope // content byte-identical to the input. The previous structural From 307e808d5f3248481b3e6be42a1b2844c97988f8 Mon Sep 17 00:00:00 2001 From: Jan Guth Date: Tue, 7 Apr 2026 21:02:19 +0200 Subject: [PATCH 05/10] fix(env): clarify byte-splice doc and guard envs slice (#133 round 4) - spliceYAMLByteLevel doc now says explicitly that the in-scope key line is also re-emitted (not preserved from local), so key quoting/style and key-line comments may change. Out-of-scope bytes still keep their original formatting. - TestSpliceYAMLByteLevel_PreservesCRLFLineEndings now checks the envs:-section bytes.Index results before slicing. A regression that dropped envs: from either side previously would have panicked instead of producing a useful failure. Co-Authored-By: Claude Opus 4.6 (1M context) --- pkg/env/splice.go | 9 ++++++--- pkg/env/splice_test.go | 6 ++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/pkg/env/splice.go b/pkg/env/splice.go index 07bea03..a3b2106 100644 --- a/pkg/env/splice.go +++ b/pkg/env/splice.go @@ -182,9 +182,12 @@ func spliceYAML(local, merged []byte, selectors []string) ([]byte, error) { // spliceYAMLByteLevel is the format-preserving fast path. It walks // `local` byte-by-byte, copies out-of-scope ranges verbatim, and -// substitutes in-scope ranges with the marshaled value subtree from -// `merged`. The output preserves whitespace, quoting style, and -// comments on every byte that wasn't touched. +// substitutes each in-scope top-level key range with a marshaled +// `key: value` entry from `merged`. That means the scoped key line +// is also re-emitted, not preserved from `local`, so key quoting +// style and any comments attached to that line may change. Bytes +// outside the replaced top-level ranges keep their original +// whitespace, quoting style, and comments. // // Returns an error (instead of falling through to a slower path) when // the local YAML can't be parsed or has a non-mapping root. The diff --git a/pkg/env/splice_test.go b/pkg/env/splice_test.go index b727f7f..57509b1 100644 --- a/pkg/env/splice_test.go +++ b/pkg/env/splice_test.go @@ -116,7 +116,13 @@ func TestSpliceYAMLByteLevel_PreservesCRLFLineEndings(t *testing.T) { } // Out-of-scope envs section is byte-identical including its CRLFs. envsStart := bytes.Index(local, []byte("envs:")) + if envsStart == -1 { + t.Fatalf("test fixture missing envs section:\n%q", local) + } outEnvsStart := bytes.Index(out, []byte("envs:")) + if outEnvsStart == -1 { + t.Fatalf("spliced output missing envs section:\n%q", out) + } if !bytes.Equal(local[envsStart:], out[outEnvsStart:]) { t.Errorf("envs section not byte-identical under CRLF") } From 14a3aeeb0fb1636bdaaa7142a842e9b34f522f14 Mon Sep 17 00:00:00 2001 From: Jan Guth Date: Tue, 7 Apr 2026 21:14:49 +0200 Subject: [PATCH 06/10] fix(env): drop dead bare-LF check in CRLF preservation test (#133 round 5) The naive bytes.Contains(\\n) && !bytes.Contains(\\r\\n) check was always false on a CRLF file because CRLF itself contains \\n. The explicit per-byte loop below is the real check; remove the dead guard and document why. Co-Authored-By: Claude Opus 4.6 (1M context) --- pkg/env/splice_test.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pkg/env/splice_test.go b/pkg/env/splice_test.go index 57509b1..8bd1a05 100644 --- a/pkg/env/splice_test.go +++ b/pkg/env/splice_test.go @@ -102,10 +102,11 @@ func TestSpliceYAMLByteLevel_PreservesCRLFLineEndings(t *testing.T) { if err != nil { t.Fatalf("splice: %v", err) } - // Every newline in the output should be CRLF — no bare LF. - if bytes.Contains(out, []byte("\n")) && !bytes.Contains(out, []byte("\r\n")) { - t.Errorf("expected CRLF endings, got bare LF:\n%q", out) - } + // Every newline in the output should be CRLF — no bare LF. The + // loop below is the actual check (it walks each '\n' and + // requires a '\r' immediately before it). The naive + // bytes.Contains check that used to live here was a no-op + // because CRLF itself contains '\n'. for i := 0; i < len(out); i++ { if out[i] == '\n' { if i == 0 || out[i-1] != '\r' { From 86a1be57c69106a769a437a989a782816e3cdde1 Mon Sep 17 00:00:00 2001 From: Jan Guth Date: Tue, 7 Apr 2026 21:19:04 +0200 Subject: [PATCH 07/10] docs(env): refer to yaml.v3 encoder instead of yaml.Marshal (#133 round 6) The implementation uses yaml.NewEncoder().Encode() and the marshalSingleTopLevelKey helper, not yaml.Marshal directly. Update the splice strategy doc comments so the wording matches. Co-Authored-By: Claude Opus 4.6 (1M context) --- pkg/env/splice.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/env/splice.go b/pkg/env/splice.go index a3b2106..5e80cc0 100644 --- a/pkg/env/splice.go +++ b/pkg/env/splice.go @@ -29,7 +29,7 @@ import ( // rewrite the local Node tree in place (replace scoped key values, // append new scoped keys, remove vanished scoped keys). Out-of-scope // comments and layout are preserved because their Nodes are untouched. -// Note: yaml.Marshal re-emits the whole document, so the output won't +// Note: the yaml.v3 encoder re-emits the whole document, so the output won't // be byte-identical to the input even for unchanged keys — this is a // limitation of yaml.v3 that the structural splice cannot work around // here. A format-preserving emitter is tracked as a separate @@ -138,7 +138,7 @@ func containsConflictMarkers(b []byte) bool { // 2. Structural splice (fallback for YAML quirks): when the byte-level // path can't be used (rare — e.g. an unrecognised local-file // shape), fall back to the Node-tree merge that round-trips the -// whole document through yaml.Marshal. Out-of-scope keys keep +// whole document through the yaml.v3 encoder. Out-of-scope keys keep // their content but lose exact whitespace. // // 3. Text splice (conflict path): when `merged` contains @@ -150,7 +150,7 @@ func containsConflictMarkers(b []byte) bool { // // (1) was added as the format-preserving emitter follow-up to PR #126. // Before it, every successful merge sync produced a noisy git diff -// because yaml.Marshal would re-emit the entire document with its own +// because the yaml.v3 encoder would re-emit the entire document with its own // preferred whitespace and quoting style — even for keys the splice // didn't touch. func spliceYAML(local, merged []byte, selectors []string) ([]byte, error) { @@ -165,7 +165,7 @@ func spliceYAML(local, merged []byte, selectors []string) ([]byte, error) { return out, nil } // Path 2: structural splice. Same parseability requirements - // but rebuilds the whole document via yaml.Marshal. Used when + // but rebuilds the whole document via the yaml.v3 encoder. Used when // the byte-level splice can't handle the local shape (e.g. // flow-style mapping where Line metadata isn't reliable). out, err = spliceYAMLStructural(local, merged, scope) From ec52a858efd13e2bfb525df7f87dec3ae643754c Mon Sep 17 00:00:00 2001 From: Jan Guth Date: Tue, 7 Apr 2026 21:46:01 +0200 Subject: [PATCH 08/10] test(env): cover byte-level splice additions path; fix stale comment (#133 round 7) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - New TestSpliceYAMLByteLevel_AppendsNewScopedKey covers the "additions" branch: local has only out-of-scope keys, merged introduces a new scoped top-level key, and the splice must append it at EOF with byte-identical preservation of the existing local content and a separating newline. - Update the stale comment in TestSpliceYAMLByteLevel_PreservesOutOfScopeBytesVerbatim that still mentioned yaml.Marshal — the implementation uses the yaml.v3 encoder. Co-Authored-By: Claude Opus 4.6 (1M context) --- pkg/env/splice_test.go | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/pkg/env/splice_test.go b/pkg/env/splice_test.go index 8bd1a05..67734ee 100644 --- a/pkg/env/splice_test.go +++ b/pkg/env/splice_test.go @@ -129,10 +129,45 @@ func TestSpliceYAMLByteLevel_PreservesCRLFLineEndings(t *testing.T) { } } +// TestSpliceYAMLByteLevel_AppendsNewScopedKey verifies the byte-level +// splice's "additions" path: when a scoped top-level key exists in +// merged but NOT in local, it must be appended at EOF without +// disturbing the existing bytes and with a separating newline. +func TestSpliceYAMLByteLevel_AppendsNewScopedKey(t *testing.T) { + // Local has only out-of-scope keys (envs), no binaries. + local := []byte(`envs: + github.com/keep/me: + files: + a.yaml: docs/a.yaml +`) + // Merged introduces binaries. The splice should append it at EOF. + merged := []byte(`binaries: + kubectl: {} +`) + out, err := spliceSelectedScope(local, merged, []string{"binaries"}, "b.yaml") + if err != nil { + t.Fatalf("splice: %v", err) + } + outStr := string(out) + // The original envs section must come first and be byte-identical. + if !bytes.HasPrefix(out, local) { + t.Errorf("local prefix not preserved verbatim:\nlocal: %q\nout: %q", local, outStr) + } + // The merged binaries content must follow. + if !strings.Contains(outStr, "binaries:") || !strings.Contains(outStr, "kubectl") { + t.Errorf("appended binaries missing:\n%s", outStr) + } + // The append must be separated by a newline (no fused last line). + binariesIdx := bytes.Index(out, []byte("binaries:")) + if binariesIdx <= 0 || out[binariesIdx-1] != '\n' { + t.Errorf("appended block not separated by newline at %d:\n%q", binariesIdx, outStr) + } +} + // TestSpliceYAMLByteLevel_PreservesOutOfScopeBytesVerbatim verifies // that the format-preserving byte-level splice keeps out-of-scope // content byte-identical to the input. The previous structural -// splice round-tripped the whole document through yaml.Marshal, +// splice re-encoded the whole document with the yaml.v3 encoder, // which would normalize whitespace, quoting, and field ordering // even for keys the splice didn't touch — producing noisy git // diffs on every successful merge sync. From 6ba42e59e16e04411b18f3da03fb60f3e525d952 Mon Sep 17 00:00:00 2001 From: Jan Guth Date: Tue, 7 Apr 2026 21:56:00 +0200 Subject: [PATCH 09/10] docs(env): update spliceSelectedScope strategy comment to 3-path (#133 round 8) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The doc still described only the structural fast path + text fallback and called the format-preserving emitter a "separate follow-up". This PR added the byte-level fast path; rewrite the top-level comment to describe the three-path dispatch in order of fidelity (byte-level → structural → text) and drop the stale follow-up wording. Co-Authored-By: Claude Opus 4.6 (1M context) --- pkg/env/splice.go | 46 +++++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/pkg/env/splice.go b/pkg/env/splice.go index 5e80cc0..f3ae2cd 100644 --- a/pkg/env/splice.go +++ b/pkg/env/splice.go @@ -13,36 +13,32 @@ import ( // result of the selected scope (from doMerge), and the list of selectors, // and returns a new file where only the selected top-level keys in `local` // are replaced with the merged values. Out-of-scope top-level keys in -// `local` are preserved; YAML comments and key ordering are preserved on -// a best-effort basis. The structural fast path round-trips through -// yaml.v3's emitter, which can normalize whitespace and quoting style -// even for unchanged keys, so the output is not guaranteed to be -// byte-identical to the input. The text fallback path (used when the -// merge produced conflict markers) preserves bytes verbatim. +// `local` are preserved. // // This is the complement of filterContent: filterContent extracts a scope; -// spliceSelectedScope puts a (merged) scope back. +// spliceSelectedScope puts a (merged) scope back. The implementation +// dispatches between three paths in order of fidelity (see spliceYAML): // -// Two paths: +// - Byte-level splice (best fidelity): when `merged` parses cleanly, +// copy out-of-scope byte ranges from `local` verbatim and re-emit +// each in-scope `key: value` pair via the yaml.v3 encoder. Bytes +// outside the replaced ranges are byte-identical to local; the +// scoped key lines are re-encoded so quoting/style and key-line +// comments inside those ranges may change. // -// - Structural splice (fast path): when `merged` parses as valid YAML, -// rewrite the local Node tree in place (replace scoped key values, -// append new scoped keys, remove vanished scoped keys). Out-of-scope -// comments and layout are preserved because their Nodes are untouched. -// Note: the yaml.v3 encoder re-emits the whole document, so the output won't -// be byte-identical to the input even for unchanged keys — this is a -// limitation of yaml.v3 that the structural splice cannot work around -// here. A format-preserving emitter is tracked as a separate -// follow-up. +// - Structural splice (fallback): when the byte-level path can't be +// used (rare yaml.v3 edge cases — flow-style mappings, single-line +// docs), round-trip the whole document through yaml.v3's Node tree. +// Out-of-scope keys keep their content but lose exact whitespace +// because the encoder re-emits the entire file. // -// - Text splice (conflict path): when `merged` contains `git merge-file` -// conflict markers, it doesn't parse as YAML. In that case we find the -// byte range of each scoped top-level key in `local` using yaml.v3 -// Node Line/Column metadata, and replace that range with the -// conflicted text from `merged`. The rest of `local` is preserved -// byte-for-byte, so consumer-owned content and comments survive even -// in the conflict case — which is the #122 data-loss fix the user -// actually cares about. +// - Text splice (conflict path): when `merged` contains git merge-file +// conflict markers it doesn't parse as YAML at all. We then find +// the byte range of each scoped top-level key in `local` and +// substitute the conflicted text in place. Bytes outside the +// scoped ranges are preserved verbatim, so consumer-owned content +// and comments survive even in the conflict case — the #122 +// data-loss fix. // // Splicing is only correct when `merged` contains the COMPLETE value // for each in-scope top-level key. Nested selectors like `database.host` From 4ee252b386072d05c189f9863c52d1662871b8ec Mon Sep 17 00:00:00 2001 From: Jan Guth Date: Tue, 7 Apr 2026 22:11:59 +0200 Subject: [PATCH 10/10] fix(env): tighten usesCRLF to require all-CRLF (#133 round 9) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously usesCRLF returned true if the file contained ANY \r\n, which meant a mostly-LF file with a stray CRLF would have spliced regions rewritten to CRLF — increasing the mixing instead of fixing it. New rule: a file is CRLF only when at least one \r\n is present AND every \n is preceded by \r. Mixed-ending files stay LF so the splice produces output that matches yaml.v3's emitter and the dominant convention. New TestUsesCRLF covers all five edge cases. Co-Authored-By: Claude Opus 4.6 (1M context) --- pkg/env/splice.go | 27 ++++++++++++++++++++++----- pkg/env/splice_test.go | 26 ++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/pkg/env/splice.go b/pkg/env/splice.go index f3ae2cd..4952e16 100644 --- a/pkg/env/splice.go +++ b/pkg/env/splice.go @@ -97,12 +97,29 @@ func topLevelKeysFromSelectors(selectors []string) map[string]bool { return keys } -// usesCRLF reports whether the file appears to use Windows-style CRLF -// line endings. We check for `\r\n` rather than just `\n` so a stray -// `\n` doesn't fool us. The byte-level splice uses this to keep -// emitted regions consistent with the local file's line endings. +// usesCRLF reports whether the file should be treated as having +// Windows-style CRLF line endings. We require: +// - at least one `\r\n` (so empty / single-line files don't +// accidentally trigger the rewrite path) +// - every `\n` to be preceded by `\r` (mixed-ending files keep +// LF so the splice doesn't make the mixing worse — yaml.v3 +// emits LF and that matches the dominant convention) +// +// The byte-level splice uses this to keep emitted regions +// consistent with the local file's line endings. func usesCRLF(b []byte) bool { - return bytes.Contains(b, []byte("\r\n")) + sawCRLF := false + for i := 0; i < len(b); i++ { + if b[i] != '\n' { + continue + } + if i == 0 || b[i-1] != '\r' { + // Found a bare LF → mixed or LF-only file. + return false + } + sawCRLF = true + } + return sawCRLF } // containsConflictMarkers checks if a byte slice contains git merge-file diff --git a/pkg/env/splice_test.go b/pkg/env/splice_test.go index 67734ee..1e1f4dc 100644 --- a/pkg/env/splice_test.go +++ b/pkg/env/splice_test.go @@ -88,6 +88,32 @@ envs: } } +// TestUsesCRLF covers the strict CRLF detector. A file is only CRLF +// when at least one \r\n is present AND every \n is preceded by \r; +// mixed-ending files (mostly LF with a stray CRLF) stay LF so the +// splice doesn't make the mixing worse. +func TestUsesCRLF(t *testing.T) { + cases := []struct { + name string + in string + want bool + }{ + {"empty", "", false}, + {"lf only", "a\nb\n", false}, + {"crlf only", "a\r\nb\r\n", true}, + {"mostly lf with stray crlf", "a\nb\r\nc\n", false}, + {"mostly crlf with stray lf", "a\r\nb\nc\r\n", false}, + {"single line no newline", "abc", false}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + if got := usesCRLF([]byte(c.in)); got != c.want { + t.Errorf("usesCRLF(%q) = %v, want %v", c.in, got, c.want) + } + }) + } +} + // TestSpliceYAMLByteLevel_PreservesCRLFLineEndings verifies that a // local file using Windows-style CRLF endings stays CRLF after the // splice. yaml.v3 always emits LF, so without the CRLF normalization