diff --git a/src/patch_set/mod.rs b/src/patch_set/mod.rs index 7e15a6e..67ac1f6 100644 --- a/src/patch_set/mod.rs +++ b/src/patch_set/mod.rs @@ -9,7 +9,9 @@ mod parse; mod tests; use std::borrow::Cow; +use std::fmt; +use crate::utils::Text; use crate::Patch; pub use error::PatchSetParseError; @@ -116,7 +118,7 @@ impl<'a, T: ToOwned + ?Sized> PatchKind<'a, T> { /// (create, delete, modify, or rename). #[derive(Clone, PartialEq, Eq)] pub struct FilePatch<'a, T: ToOwned + ?Sized> { - operation: FileOperation<'a>, + operation: FileOperation<'a, T>, kind: PatchKind<'a, T>, old_mode: Option, new_mode: Option, @@ -139,7 +141,7 @@ where impl<'a, T: ToOwned + ?Sized> FilePatch<'a, T> { fn new( - operation: FileOperation<'a>, + operation: FileOperation<'a, T>, patch: Patch<'a, T>, old_mode: Option, new_mode: Option, @@ -153,7 +155,7 @@ impl<'a, T: ToOwned + ?Sized> FilePatch<'a, T> { } /// Returns the file operation for this patch. - pub fn operation(&self) -> &FileOperation<'a> { + pub fn operation(&self) -> &FileOperation<'a, T> { &self.operation } @@ -192,12 +194,12 @@ impl<'a, T: ToOwned + ?Sized> FilePatch<'a, T> { /// /// This is determined by examining the `---` and `+++` header lines /// of a unified diff patch, and git extended headers when available. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum FileOperation<'a> { +#[derive(PartialEq, Eq)] +pub enum FileOperation<'a, T: ToOwned + ?Sized> { /// Delete a file (`+++ /dev/null`). - Delete(Cow<'a, str>), + Delete(Cow<'a, T>), /// Create a new file (`--- /dev/null`). - Create(Cow<'a, str>), + Create(Cow<'a, T>), /// Modify a file. /// /// * If `original == modified`, this is an in-place modification. @@ -205,35 +207,78 @@ pub enum FileOperation<'a> { /// /// Usually, the caller needs to strip the prefix from the paths to determine. Modify { - original: Cow<'a, str>, - modified: Cow<'a, str>, + original: Cow<'a, T>, + modified: Cow<'a, T>, }, /// Rename a file (move from `from` to `to`, delete `from`). /// /// Only produced when git extended headers explicitly indicate a rename. - Rename { - from: Cow<'a, str>, - to: Cow<'a, str>, - }, + Rename { from: Cow<'a, T>, to: Cow<'a, T> }, /// Copy a file (copy from `from` to `to`, keep `from`). /// /// Only produced when git extended headers explicitly indicate a copy. - Copy { - from: Cow<'a, str>, - to: Cow<'a, str>, - }, + Copy { from: Cow<'a, T>, to: Cow<'a, T> }, +} + +impl Clone for FileOperation<'_, T> { + fn clone(&self) -> Self { + match self { + Self::Delete(p) => Self::Delete(p.clone()), + Self::Create(p) => Self::Create(p.clone()), + Self::Modify { original, modified } => Self::Modify { + original: original.clone(), + modified: modified.clone(), + }, + Self::Rename { from, to } => Self::Rename { + from: from.clone(), + to: to.clone(), + }, + Self::Copy { from, to } => Self::Copy { + from: from.clone(), + to: to.clone(), + }, + } + } +} + +impl fmt::Debug for FileOperation<'_, T> +where + T: ToOwned + fmt::Debug, + O: std::borrow::Borrow + fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Delete(p) => f.debug_tuple("Delete").field(p).finish(), + Self::Create(p) => f.debug_tuple("Create").field(p).finish(), + Self::Modify { original, modified } => f + .debug_struct("Modify") + .field("original", original) + .field("modified", modified) + .finish(), + Self::Rename { from, to } => f + .debug_struct("Rename") + .field("from", from) + .field("to", to) + .finish(), + Self::Copy { from, to } => f + .debug_struct("Copy") + .field("from", from) + .field("to", to) + .finish(), + } + } } -impl FileOperation<'_> { +impl FileOperation<'_, T> { /// Strip the first `n` path components from the paths in this operation. /// /// This is similar to the `-p` option in GNU patch. For example, /// `strip_prefix(1)` on a path `a/src/lib.rs` would return `src/lib.rs`. - pub fn strip_prefix(&self, n: usize) -> FileOperation<'_> { - fn strip(path: &str, n: usize) -> &str { + pub fn strip_prefix(&self, n: usize) -> FileOperation<'_, T> { + fn strip(path: &T, n: usize) -> &T { let mut remaining = path; for _ in 0..n { - match remaining.split_once('/') { + match remaining.split_at_exclusive("/") { Some((_first, rest)) => remaining = rest, None => return remaining, } diff --git a/src/patch_set/parse.rs b/src/patch_set/parse.rs index cec612b..e0ef4b0 100644 --- a/src/patch_set/parse.rs +++ b/src/patch_set/parse.rs @@ -5,6 +5,7 @@ use super::{ PatchSetParseError, }; use crate::patch::parse::parse_one; +use crate::utils::Text; use std::borrow::Cow; @@ -20,7 +21,7 @@ const EMAIL_PREAMBLE_SEPARATOR: &str = "\n---\n"; /// Streaming iterator for parsing patches one at a time. /// -/// Created by [`PatchSet::parse`]. +/// Created by [`PatchSet::parse`] or [`PatchSet::parse_bytes`]. /// /// # Example /// @@ -45,16 +46,16 @@ const EMAIL_PREAMBLE_SEPARATOR: &str = "\n---\n"; /// println!("{:?}", patch.operation()); /// } /// ``` -pub struct PatchSet<'a> { - input: &'a str, +pub struct PatchSet<'a, T: ?Sized> { + input: &'a T, offset: usize, opts: ParseOptions, finished: bool, found_any: bool, } -impl<'a> PatchSet<'a> { - /// Creates a streaming parser for multiple file patches. +impl<'a> PatchSet<'a, str> { + /// Creates a streaming parser for multiple file patches from a string. pub fn parse(input: &'a str, opts: ParseOptions) -> Self { // Strip email preamble once at construction let input = strip_email_preamble(input); @@ -66,88 +67,117 @@ impl<'a> PatchSet<'a> { found_any: false, } } +} - /// Creates an error with the current offset as span. - fn error(&self, kind: PatchSetParseErrorKind) -> PatchSetParseError { - PatchSetParseError::new(kind, self.offset..self.offset) - } - - fn next_unidiff_patch(&mut self) -> Option, PatchSetParseError>> { - let remaining = &self.input[self.offset..]; - if remaining.is_empty() { - return None; +impl<'a> PatchSet<'a, [u8]> { + /// Creates a streaming parser for multiple file patches from raw bytes. + /// + /// This is useful when the diff output may contain non-UTF-8 content, + /// such as patches produced by `git diff --binary` on files that git + /// misdetects as text. + pub fn parse_bytes(input: &'a [u8], opts: ParseOptions) -> Self { + let input = strip_email_preamble(input); + Self { + input, + offset: 0, + opts, + finished: false, + found_any: false, } + } +} - let patch_start = find_patch_start(remaining)?; - self.found_any = true; - - let patch_input = &remaining[patch_start..]; - - let opts = crate::patch::parse::ParseOpts::default(); - let (result, consumed) = parse_one(patch_input, opts); - // Always advance so the iterator makes progress even on error. - let abs_patch_start = self.offset + patch_start; - self.offset += patch_start + consumed; - - let patch = match result { - Ok(patch) => patch, - Err(e) => return Some(Err(e.into())), - }; - let operation = match extract_file_op_unidiff(patch.original_path(), patch.modified_path()) - { - Ok(op) => op, - Err(mut e) => { - e.set_span(abs_patch_start..abs_patch_start); - return Some(Err(e)); - } - }; +impl<'a> Iterator for PatchSet<'a, str> { + type Item = Result, PatchSetParseError>; - Some(Ok(FilePatch::new(operation, patch, None, None))) + fn next(&mut self) -> Option { + next_patch(self) } } -impl<'a> Iterator for PatchSet<'a> { - type Item = Result, PatchSetParseError>; +impl<'a> Iterator for PatchSet<'a, [u8]> { + type Item = Result, PatchSetParseError>; fn next(&mut self) -> Option { - if self.finished { - return None; + next_patch(self) + } +} + +fn next_patch<'a, T: Text + ?Sized>( + ps: &mut PatchSet<'a, T>, +) -> Option, PatchSetParseError>> { + if ps.finished { + return None; + } + + let result = match ps.opts.format { + Format::UniDiff => next_unidiff_patch(ps), + }; + + if result.is_none() { + ps.finished = true; + if !ps.found_any { + let err = PatchSetParseError::new( + PatchSetParseErrorKind::NoPatchesFound, + ps.offset..ps.offset, + ); + return Some(Err(err)); } + } - let result = match self.opts.format { - Format::UniDiff => { - let result = self.next_unidiff_patch(); - if result.is_none() { - self.finished = true; - if !self.found_any { - return Some(Err(self.error(PatchSetParseErrorKind::NoPatchesFound))); - } - } - result - } - }; + result +} - result +fn next_unidiff_patch<'a, T: Text + ?Sized>( + ps: &mut PatchSet<'a, T>, +) -> Option, PatchSetParseError>> { + let remaining = remaining(ps); + if remaining.is_empty() { + return None; } + + let patch_start = find_patch_start(remaining)?; + ps.found_any = true; + + let (_, patch_input) = remaining.split_at(patch_start); + + let opts = crate::patch::parse::ParseOpts::default(); + let (result, consumed) = parse_one(patch_input, opts); + // Always advance so the iterator makes progress even on error. + let abs_patch_start = ps.offset + patch_start; + ps.offset += patch_start + consumed; + + let patch = match result { + Ok(patch) => patch, + Err(e) => return Some(Err(e.into())), + }; + let operation = match extract_file_op_unidiff(patch.original_path(), patch.modified_path()) { + Ok(op) => op, + Err(mut e) => { + e.set_span(abs_patch_start..abs_patch_start); + return Some(Err(e)); + } + }; + + Some(Ok(FilePatch::new(operation, patch, None, None))) +} + +fn remaining<'a, T: Text + ?Sized>(ps: &PatchSet<'a, T>) -> &'a T { + let (_, rest) = ps.input.split_at(ps.offset); + rest } /// Finds the byte offset of the first patch header in the input. /// /// A patch header starts with `--- ` or `+++ ` (the file path lines). /// Returns `None` if no header is found. -fn find_patch_start(input: &str) -> Option { +fn find_patch_start(input: &T) -> Option { let mut offset = 0; for line in input.lines() { if line.starts_with(ORIGINAL_PREFIX) || line.starts_with(MODIFIED_PREFIX) { return Some(offset); } offset += line.len(); - // Account for the line ending that `.lines()` strips - if input[offset..].starts_with("\r\n") { - offset += 2; - } else if input[offset..].starts_with('\n') { - offset += 1; - } } None } @@ -167,25 +197,30 @@ fn find_patch_start(input: &str) -> Option { /// > The log message and the patch are separated by a line with a three-dash line. /// /// [`git format-patch`]: https://git-scm.com/docs/git-format-patch -fn strip_email_preamble(input: &str) -> &str { +fn strip_email_preamble(input: &T) -> &T { // only strip preamble for mbox-formatted input if !input.starts_with("From ") { return input; } match input.find(EMAIL_PREAMBLE_SEPARATOR) { - Some(pos) => &input[pos + EMAIL_PREAMBLE_SEPARATOR.len()..], + Some(pos) => { + let (_, rest) = input.split_at(pos + EMAIL_PREAMBLE_SEPARATOR.len()); + rest + } None => input, } } /// Extracts the file operation from a patch based on its header paths. -pub(crate) fn extract_file_op_unidiff<'a>( - original: Option<&Cow<'a, str>>, - modified: Option<&Cow<'a, str>>, -) -> Result, PatchSetParseError> { - let is_create = original.map(Cow::as_ref) == Some(DEV_NULL); - let is_delete = modified.map(Cow::as_ref) == Some(DEV_NULL); +fn extract_file_op_unidiff<'a, T: Text + ?Sized>( + original: Option<&Cow<'a, T>>, + modified: Option<&Cow<'a, T>>, +) -> Result, PatchSetParseError> { + let is_dev_null = |cow: &Cow<'_, T>| cow.as_ref().as_bytes() == DEV_NULL.as_bytes(); + + let is_create = original.is_some_and(is_dev_null); + let is_delete = modified.is_some_and(is_dev_null); if is_create && is_delete { return Err(PatchSetParseErrorKind::BothDevNull.into()); diff --git a/src/patch_set/tests.rs b/src/patch_set/tests.rs index 5bbfd31..52f1c01 100644 --- a/src/patch_set/tests.rs +++ b/src/patch_set/tests.rs @@ -1,6 +1,6 @@ //! Tests for patchset parsing. -use super::{error::PatchSetParseErrorKind, FileOperation, ParseOptions, PatchSet}; +use super::{error::PatchSetParseErrorKind, FileOperation, ParseOptions, PatchKind, PatchSet}; mod file_operation { use super::*; @@ -463,3 +463,106 @@ In a hole in the ground there lived a hobbit ); } } + +mod patchset_unidiff_bytes { + use super::*; + use crate::patch::Line; + + #[test] + fn single_file_bytes() { + let content = b"\ +--- a/file.rs ++++ b/file.rs +@@ -1 +1 @@ +-old ++new +"; + let patches = PatchSet::parse_bytes(content.as_slice(), ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 1); + assert!(patches[0].operation().is_modify()); + } + + #[test] + fn non_utf8_hunk_content() { + // Simulate a patch where hunk content has non-UTF-8 bytes. + // This is the primary use case for parse_bytes: git may produce + // text-format hunks for files it misdetects as text (e.g. small + // PNGs without NUL bytes). + let mut content = Vec::new(); + content.extend_from_slice(b"--- a/icon.png\n"); + content.extend_from_slice(b"+++ b/icon.png\n"); + content.extend_from_slice(b"@@ -1 +1 @@\n"); + content.extend_from_slice(b"-old\x89PNG\n"); + content.extend_from_slice(b"+new\x89PNG\n"); + + let patches = PatchSet::parse_bytes(&content, ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 1); + + let PatchKind::Text(patch) = patches[0].patch(); + let lines = patch.hunks()[0].lines(); + assert_eq!(lines[0], Line::Delete(b"old\x89PNG\n".as_slice())); + assert_eq!(lines[1], Line::Insert(b"new\x89PNG\n".as_slice())); + } + + #[test] + fn multi_file_bytes() { + let content = b"\ +--- a/file1.rs ++++ b/file1.rs +@@ -1 +1 @@ +-old1 ++new1 +--- a/file2.rs ++++ b/file2.rs +@@ -1 +1 @@ +-old2 ++new2 +"; + let patches = PatchSet::parse_bytes(content.as_slice(), ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 2); + } + + #[test] + fn create_file_bytes() { + let content = b"\ +--- /dev/null ++++ b/new.rs +@@ -0,0 +1 @@ ++content +"; + let patches = PatchSet::parse_bytes(content.as_slice(), ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 1); + assert!(patches[0].operation().is_create()); + assert_eq!( + patches[0].operation(), + &FileOperation::Create(b"b/new.rs".to_vec().into()) + ); + } + + #[test] + fn delete_file_bytes() { + let content = b"\ +--- a/old.rs ++++ /dev/null +@@ -1 +0,0 @@ +-content +"; + let patches = PatchSet::parse_bytes(content.as_slice(), ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 1); + assert!(patches[0].operation().is_delete()); + assert_eq!( + patches[0].operation(), + &FileOperation::Delete(b"a/old.rs".to_vec().into()) + ); + } +} diff --git a/tests/compat/common.rs b/tests/compat/common.rs index 7bc6f37..60cd2e2 100644 --- a/tests/compat/common.rs +++ b/tests/compat/common.rs @@ -58,7 +58,7 @@ impl<'a> Case<'a> { let case_dir = self.case_dir(); let in_dir = case_dir.join("in"); let patch_path = in_dir.join("foo.patch"); - let patch = fs::read_to_string(&patch_path) + let patch = fs::read(&patch_path) .unwrap_or_else(|e| panic!("failed to read {}: {e}", patch_path.display())); let case_name = self.case_name; @@ -230,22 +230,35 @@ fn copy_input_files_impl(src: &Path, dst: &Path, base: &Path, skip_extensions: & } } +fn bytes_to_path(b: &[u8]) -> &Path { + #[cfg(unix)] + { + use std::os::unix::ffi::OsStrExt; + Path::new(std::ffi::OsStr::from_bytes(b)) + } + #[cfg(not(unix))] + { + // On Windows, falls back to UTF-8 conversion since `OsStr` is WTF-16. + Path::new(std::str::from_utf8(b).expect("non-UTF-8 path not supported on Windows")) + } +} + /// Apply patch using diffy to output directory. pub fn apply_diffy( in_dir: &Path, - patch: &str, + patch: &[u8], output_dir: &Path, opts: ParseOptions, strip_prefix: u32, ) -> Result<(), TestError> { - let patches: Vec<_> = PatchSet::parse(patch, opts) + let patches: Vec<_> = PatchSet::parse_bytes(patch, opts) .collect::>() .map_err(TestError::Parse)?; for file_patch in patches.iter() { let operation = file_patch.operation().strip_prefix(strip_prefix as usize); - let (original_name, target_name) = match &operation { + let (original_name, target_name): (Option<&[u8]>, &[u8]) = match &operation { FileOperation::Create(path) => (None, path.as_ref()), FileOperation::Delete(path) => (Some(path.as_ref()), path.as_ref()), FileOperation::Modify { original, modified } => { @@ -259,21 +272,21 @@ pub fn apply_diffy( match file_patch.patch() { PatchKind::Text(patch) => { let original = if let Some(name) = original_name { - let original_path = in_dir.join(name); - fs::read_to_string(&original_path).unwrap_or_else(|e| { + let original_path = in_dir.join(bytes_to_path(name)); + fs::read(&original_path).unwrap_or_else(|e| { panic!("failed to read {}: {e}", original_path.display()) }) } else { - String::new() + Vec::new() }; - let result = diffy::apply(&original, patch).map_err(TestError::Apply)?; + let result = diffy::apply_bytes(&original, patch).map_err(TestError::Apply)?; - let result_path = output_dir.join(target_name); + let result_path = output_dir.join(bytes_to_path(target_name)); if let Some(parent) = result_path.parent() { fs::create_dir_all(parent).unwrap(); } - fs::write(&result_path, result.as_bytes()).unwrap(); + fs::write(&result_path, &result).unwrap(); } } } diff --git a/tests/compat/gnu_patch/mod.rs b/tests/compat/gnu_patch/mod.rs index fe59fcb..643d16e 100644 --- a/tests/compat/gnu_patch/mod.rs +++ b/tests/compat/gnu_patch/mod.rs @@ -151,6 +151,13 @@ fn junk_between_hunks() { Case::gnu_patch("junk_between_hunks").run(); } +// Patch with non-UTF-8 bytes (0x80, 0xff) in hunk content. +// Both GNU patch and diffy handle raw bytes correctly. +#[test] +fn non_utf8_hunk_content() { + Case::gnu_patch("non_utf8_hunk_content").run(); +} + // Failure cases #[test] diff --git a/tests/compat/gnu_patch/non_utf8_hunk_content/in/file.bin b/tests/compat/gnu_patch/non_utf8_hunk_content/in/file.bin new file mode 100644 index 0000000..5ddb01f --- /dev/null +++ b/tests/compat/gnu_patch/non_utf8_hunk_content/in/file.bin @@ -0,0 +1 @@ +hello €ÿ world diff --git a/tests/compat/gnu_patch/non_utf8_hunk_content/in/foo.patch b/tests/compat/gnu_patch/non_utf8_hunk_content/in/foo.patch new file mode 100644 index 0000000..10e9997 --- /dev/null +++ b/tests/compat/gnu_patch/non_utf8_hunk_content/in/foo.patch @@ -0,0 +1,5 @@ +--- file.bin ++++ file.bin +@@ -1 +1 @@ +-hello €ÿ world ++hello €ÿ universe diff --git a/tests/compat/gnu_patch/non_utf8_hunk_content/out/file.bin b/tests/compat/gnu_patch/non_utf8_hunk_content/out/file.bin new file mode 100644 index 0000000..715e61f --- /dev/null +++ b/tests/compat/gnu_patch/non_utf8_hunk_content/out/file.bin @@ -0,0 +1 @@ +hello €ÿ universe diff --git a/tests/replay.rs b/tests/replay.rs index 7eaa56b..38cddd7 100644 --- a/tests/replay.rs +++ b/tests/replay.rs @@ -99,8 +99,15 @@ impl CatFile { /// Look up an object by `:`. /// /// Returns `None` for submodules, commit/tree/tag object types, and missing objects. - fn get(&mut self, rev: &str, path: &str) -> Option> { - writeln!(self.stdin, "{rev}:{path}").expect("cat-file stdin write failed"); + fn get(&mut self, rev: &str, path: &[u8]) -> Option> { + // `git cat-file :` accepts raw bytes + let mut query = rev.as_bytes().to_vec(); + query.push(b':'); + query.extend_from_slice(path); + query.push(b'\n'); + self.stdin + .write_all(&query) + .expect("cat-file stdin write failed"); let mut header = String::new(); self.stdout @@ -150,11 +157,6 @@ impl CatFile { Some(buf) } - - /// Like [`CatFile::get`] but returns only UTF-8 string. - fn get_text(&mut self, rev: &str, path: &str) -> Option { - self.get(rev, path).and_then(|b| String::from_utf8(b).ok()) - } } /// Local enum for test configuration (maps to ParseOptions). @@ -232,7 +234,7 @@ fn test_mode() -> TestMode { } } -fn git(repo: &Path, args: &[&str]) -> String { +fn git_bytes(repo: &Path, args: &[&str]) -> Vec { let mut cmd = Command::new("git"); cmd.env("GIT_CONFIG_NOSYSTEM", "1"); cmd.env("GIT_CONFIG_GLOBAL", "/dev/null"); @@ -246,7 +248,11 @@ fn git(repo: &Path, args: &[&str]) -> String { panic!("git {args:?} failed: {stderr}"); } - String::from_utf8_lossy(&output.stdout).into_owned() + output.stdout +} + +fn git(repo: &Path, args: &[&str]) -> String { + String::from_utf8_lossy(&git_bytes(repo, args)).into_owned() } /// Get the list of commits from oldest to newest. @@ -324,7 +330,7 @@ fn process_commit( // UniDiff format cannot express pure renames (no ---/+++ headers). // Use `--no-renames` to represent them as delete + create instead. let diff_output = match mode { - TestMode::UniDiff => git(repo, &["diff", "--no-renames", parent, child]), + TestMode::UniDiff => git_bytes(repo, &["diff", "--no-renames", parent, child]), }; if diff_output.is_empty() { @@ -385,9 +391,10 @@ fn process_commit( }; } - let patchset: Vec<_> = match PatchSet::parse(&diff_output, mode.into()).collect() { + let patchset: Vec<_> = match PatchSet::parse_bytes(&diff_output, mode.into()).collect() { Ok(ps) => ps, Err(e) => { + let diff_output = String::from_utf8_lossy(&diff_output); panic!( "Failed to parse patch for {parent_short}..{child_short}: {e}\n\n\ Diff:\n{diff_output}" @@ -399,6 +406,7 @@ fn process_commit( // This catches both missing and spurious patches. if patchset.len() != expected_file_count { let n = patchset.len(); + let diff_output = String::from_utf8_lossy(&diff_output); panic!( "Patch count mismatch for {parent_short}..{child_short}: \ expected {expected_file_count} files, parsed {n} patches\n\n\ @@ -416,68 +424,102 @@ fn process_commit( }; let operation = operation.strip_prefix(strip); - let (base_path, target_path, desc): (Option<&str>, Option<&str>, _) = match &operation { - FileOperation::Create(path) => (None, Some(path.as_ref()), format!("create {path}")), - FileOperation::Delete(path) => (Some(path.as_ref()), None, format!("delete {path}")), + let (base_path, target_path, desc): (Option<&[u8]>, Option<&[u8]>, _) = match &operation { + FileOperation::Create(path) => { + let p = path.as_ref(); + ( + None, + Some(p), + format!("create {}", String::from_utf8_lossy(p)), + ) + } + FileOperation::Delete(path) => { + let p = path.as_ref(); + ( + Some(p), + None, + format!("delete {}", String::from_utf8_lossy(p)), + ) + } FileOperation::Modify { original, modified } => { - let desc = if original == modified { - format!("modify {original}") + let (o, m) = (original.as_ref(), modified.as_ref()); + let desc = if o == m { + format!("modify {}", String::from_utf8_lossy(o)) } else { - format!("modify {original} -> {modified}") + format!( + "modify {} -> {}", + String::from_utf8_lossy(o), + String::from_utf8_lossy(m), + ) }; - (Some(original.as_ref()), Some(modified.as_ref()), desc) + (Some(o), Some(m), desc) + } + FileOperation::Rename { from, to } => { + let (f, t) = (from.as_ref(), to.as_ref()); + ( + Some(f), + Some(t), + format!( + "rename {} -> {}", + String::from_utf8_lossy(f), + String::from_utf8_lossy(t), + ), + ) + } + FileOperation::Copy { from, to } => { + let (f, t) = (from.as_ref(), to.as_ref()); + ( + Some(f), + Some(t), + format!( + "copy {} -> {}", + String::from_utf8_lossy(f), + String::from_utf8_lossy(t), + ), + ) } - FileOperation::Rename { from, to } => ( - Some(from.as_ref()), - Some(to.as_ref()), - format!("rename {from} -> {to}"), - ), - FileOperation::Copy { from, to } => ( - Some(from.as_ref()), - Some(to.as_ref()), - format!("copy {from} -> {to}"), - ), }; match file_patch.patch() { PatchKind::Text(patch) => { let base_content = if let Some(path) = base_path { - let Some(content) = cat.get_text(parent, path) else { + let Some(content) = cat.get(parent, path) else { skipped += 1; continue; }; content } else { - String::new() + Vec::new() }; let expected_content = if let Some(path) = target_path { - let Some(content) = cat.get_text(child, path) else { + let Some(content) = cat.get(child, path) else { skipped += 1; continue; }; content } else { - String::new() + Vec::new() }; - let result = match diffy::apply(&base_content, patch) { + let result = match diffy::apply_bytes(&base_content, patch) { Ok(r) => r, Err(e) => { + let base_content = String::from_utf8_lossy(&base_content); panic!( "Failed to apply patch at {parent_short}..{child_short} for {desc}: {e}\n\n\ - Patch:\n{patch}\n\n\ Base content:\n{base_content}" ); } }; if result != expected_content { + let expected_content = String::from_utf8_lossy(&expected_content); + let result = String::from_utf8_lossy(&result); panic!( "Content mismatch at {parent_short}..{child_short} for {desc}\n\n\ --- Expected ---\n{expected_content}\n\n\ - --- Got ---\n{result}\n\n\ - --- Patch ---\n{patch}" + --- Got ---\n{result}" ); } }