diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d1c82923..9bbd1886 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,13 +23,14 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - rust: [stable, beta, nightly, 1.70.0] + rust: [stable, beta, nightly, 1.75.0] steps: - uses: actions/checkout@v6 - run: rustup toolchain install ${{ matrix.rust }} --profile minimal - run: cargo +${{ matrix.rust }} check --all-targets --all-features - run: cargo +${{ matrix.rust }} test + - run: cargo +${{ matrix.rust }} test -F binary lint: runs-on: ubuntu-latest diff --git a/.github/workflows/replay.yml b/.github/workflows/replay.yml index a9f8f997..48368fce 100644 --- a/.github/workflows/replay.yml +++ b/.github/workflows/replay.yml @@ -44,7 +44,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - parse_mode: [unidiff] + parse_mode: [unidiff, gitdiff] name: ${{ inputs.name && matrix.parse_mode || format('{0} ({1}, {2})', inputs.repo_url, matrix.parse_mode, inputs.commits) }} steps: - uses: actions/checkout@v6 @@ -67,7 +67,7 @@ jobs: exit 1 fi - run: rustup toolchain install stable --profile minimal - - run: cargo test --release --test replay -- --ignored --nocapture + - run: cargo test --release --test replay -F binary -- --ignored --nocapture env: DIFFY_TEST_REPO: ${{ inputs.repo_url == '' && '.' || 'target/test-repo' }} DIFFY_TEST_COMMITS: ${{ inputs.commits }} diff --git a/Cargo.lock b/Cargo.lock index ad290e3b..c726b1cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + [[package]] name = "anstream" version = "0.6.21" @@ -109,6 +115,7 @@ name = "diffy" version = "0.4.2" dependencies = [ "anstyle", + "flate2", "rayon", "snapbox", ] @@ -152,6 +159,16 @@ dependencies = [ "libredox", ] +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "miniz_oxide", + "zlib-rs", +] + [[package]] name = "getrandom" version = "0.3.4" @@ -200,6 +217,16 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + [[package]] name = "normalize-line-endings" version = "0.3.0" @@ -281,6 +308,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + [[package]] name = "similar" version = "2.7.0" @@ -446,3 +479,9 @@ name = "wit-bindgen" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" + +[[package]] +name = "zlib-rs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be3d40e40a133f9c916ee3f9f4fa2d9d63435b5fbe1bfc6d9dae0aa0ada1513" diff --git a/Cargo.toml b/Cargo.toml index 25c6e518..125a3c72 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,14 +9,16 @@ repository = "https://github.com/bmwill/diffy" readme = "README.md" keywords = ["diff", "patch", "merge"] categories = ["text-processing"] -rust-version = "1.70.0" +rust-version = "1.75.0" edition = "2021" [features] +binary = ["dep:flate2"] color = ["dep:anstyle"] [dependencies] anstyle = { version = "1.0.13", optional = true } +flate2 = { version = "1.1.9", optional = true, default-features = false, features = ["zlib-rs"] } [dev-dependencies] rayon = "1.10.0" @@ -25,3 +27,11 @@ snapbox = { version = "0.6.24", features = ["dir"] } [[example]] name = "patch_formatter" required-features = ["color"] + +[[test]] +name = "compat" +required-features = ["binary"] + +[[test]] +name = "replay" +required-features = ["binary"] diff --git a/deny.toml b/deny.toml index d495b7c5..740f35a3 100644 --- a/deny.toml +++ b/deny.toml @@ -91,6 +91,7 @@ ignore = [ allow = [ "MIT", "Apache-2.0", + "Zlib", #"Apache-2.0 WITH LLVM-exception", ] # The confidence threshold for detecting a license from license text. diff --git a/src/binary/base85.rs b/src/binary/base85.rs new file mode 100644 index 00000000..182efe87 --- /dev/null +++ b/src/binary/base85.rs @@ -0,0 +1,229 @@ +//! Base85 encoding and decoding using the character set defined in [RFC 1924]. +//! +//! ## References +//! +//! * [RFC 1924] +//! * [Wikipedia: Ascii85 § RFC 1924 version](https://en.wikipedia.org/wiki/Ascii85#RFC_1924_version) +//! +//! [RFC 1924]: https://datatracker.ietf.org/doc/html/rfc1924 + +use std::fmt; + +/// Base85 character set (RFC 1924). +const ALPHABET: &[u8; 85] = b"0123456789\ + ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ + !#$%&()*+-;<=>?@^_`{|}~"; + +/// Pre-computed lookup table for Base85 decoding. +/// +/// Maps ASCII byte value → digit value or `0xFF` for invalid characters. +/// This provides O(1) lookup. +const TABLE: [u8; 256] = { + let mut table = [0xFFu8; 256]; + let mut i = 0usize; + while i < 85 { + table[ALPHABET[i] as usize] = i as u8; + i += 1; + } + table +}; + +/// Error type for Base85 operations. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Base85Error { + /// Invalid character that is not in RFC 1924 alphabet. + InvalidCharacter(char), + /// Invalid input length for the operation. + InvalidLength, +} + +impl fmt::Display for Base85Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Base85Error::InvalidCharacter(c) => write!(f, "invalid base85 character: {:?}", c), + Base85Error::InvalidLength => write!(f, "invalid input length"), + } + } +} + +impl std::error::Error for Base85Error {} + +/// Decodes a Base85 string to the provided output. +/// +/// ## Limitations +/// +/// The input length must be a multiple of 5. +/// +/// This function does not handle padding for partial chunks. +/// When decoding data where the original byte count isn't a multiple of 4, +/// callers must handle truncation at a higher level. +/// For example, via a length indicator in Git binary patch. +pub fn decode_into(input: &str, output: &mut impl Extend) -> Result<(), Base85Error> { + let bytes = input.as_bytes(); + + if bytes.len() % 5 != 0 { + return Err(Base85Error::InvalidLength); + } + + // TODO: Use `as_chunks::<5>()` when MSRV >= 1.88 + for chunk in bytes.chunks_exact(5) { + let mut value: u32 = 0; + for &byte in chunk { + let digit = TABLE[byte as usize]; + if digit == 0xFF { + return Err(Base85Error::InvalidCharacter(byte as char)); + } + value = value * 85 + digit as u32; + } + + output.extend(value.to_be_bytes()); + } + + Ok(()) +} + +/// Encodes bytes in Base85 to the provided output. +/// +/// ## Limitations +/// +/// The input length must be a multiple of 4. +/// +/// This function does not handle padding for partial chunks. +/// Callers encoding data where the byte count isn't a multiple of 4 +/// must handle padding at a higher level. +/// For example, via a length indicator in Git binary patch format. +#[allow(dead_code)] // will be used for patch formatting +pub fn encode_into(input: &[u8], output: &mut impl Extend) -> Result<(), Base85Error> { + if input.len() % 4 != 0 { + return Err(Base85Error::InvalidLength); + } + + // TODO: Use `as_chunks::<4>()` when MSRV >= 1.88 + for chunk in input.chunks_exact(4) { + let mut value = u32::from_be_bytes(chunk.try_into().unwrap()); + + // Extract 5 base85 digits (least to most significant order) + let mut digits = [0u8; 5]; + for digit in digits.iter_mut().rev() { + *digit = ALPHABET[(value % 85) as usize]; + value /= 85; + } + output.extend(digits.iter().map(|&b| b as char)); + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn decode(input: &str) -> Result, Base85Error> { + let mut result = Vec::with_capacity((input.len() / 5) * 4); + decode_into(input, &mut result)?; + Ok(result) + } + + fn encode(input: &[u8]) -> Result { + let mut result = String::with_capacity((input.len() / 4) * 5); + encode_into(input, &mut result)?; + Ok(result) + } + + const TEST_VECTORS: &[(&[u8], &str)] = &[ + (b"", ""), + (&[0x00, 0x00, 0x00, 0x00], "00000"), + (&[0xff, 0xff, 0xff, 0xff], "|NsC0"), + // Rust ecosystem phrases + (b"Rust", "Qgw55"), + (b"Fearless concurrency", "MrC1gY-MwEAY*TCV|8+JWo~16"), + (b"memory safe!", "ZDnn5a(N(gVP<6^"), + (b"blazing fast", "Vr*f0X>MmAW?^%5"), + ( + b"zero-cost abstraction!??", + "dS!BNEn{zUbRc13b98cHV{~b6ZXrKE", + ), + ]; + + #[test] + fn table_covers_all_alphabet_chars() { + for (i, &c) in ALPHABET.iter().enumerate() { + assert_eq!( + TABLE[c as usize], i as u8, + "mismatch for char '{}' at index {}", + c as char, i + ); + } + } + + #[test] + fn table_rejects_invalid_chars() { + let invalid_chars = b" \t\n\r\"'\\[],:"; + for &c in invalid_chars { + assert_eq!( + TABLE[c as usize], 0xFF, + "char '{}' should be invalid", + c as char + ); + } + } + + #[test] + fn decode_test_vectors() { + for (bytes, encoded) in TEST_VECTORS { + let result = decode(encoded).unwrap(); + assert_eq!(&result, *bytes, "decode({:?}) failed", encoded); + } + } + + #[test] + fn encode_test_vectors() { + for (bytes, encoded) in TEST_VECTORS { + let result = encode(bytes).unwrap(); + assert_eq!(result, *encoded, "encode({:?}) failed", bytes); + } + } + + #[test] + fn decode_invalid_length() { + assert!(matches!(decode("0000"), Err(Base85Error::InvalidLength))); + assert!(matches!(decode("000"), Err(Base85Error::InvalidLength))); + assert!(matches!(decode("00"), Err(Base85Error::InvalidLength))); + assert!(matches!(decode("0"), Err(Base85Error::InvalidLength))); + } + + #[test] + fn decode_invalid_character() { + assert!(matches!( + decode("0000 "), + Err(Base85Error::InvalidCharacter(' ')) + )); + assert!(matches!( + decode("0000\""), + Err(Base85Error::InvalidCharacter('"')) + )); + } + + #[test] + fn encode_invalid_length() { + assert!(matches!(encode(&[0]), Err(Base85Error::InvalidLength))); + assert!(matches!(encode(&[0, 0]), Err(Base85Error::InvalidLength))); + assert!(matches!( + encode(&[0, 0, 0]), + Err(Base85Error::InvalidLength) + )); + assert!(matches!( + encode(&[0, 0, 0, 0, 0]), + Err(Base85Error::InvalidLength) + )); + } + + #[test] + fn round_trip() { + for (bytes, _) in TEST_VECTORS { + let encoded = encode(bytes).unwrap(); + let decoded = decode(&encoded).unwrap(); + assert_eq!(&decoded, *bytes, "round-trip failed for {:?}", bytes); + } + } +} diff --git a/src/binary/delta.rs b/src/binary/delta.rs new file mode 100644 index 00000000..fa1de268 --- /dev/null +++ b/src/binary/delta.rs @@ -0,0 +1,362 @@ +//! Git delta binary diff support. +//! +//! A delta payload contains: +//! +//! 1. Header: variable-length encoded sizes (original_size, modified_size) +//! 2. Instructions: sequence of `ADD` and `COPY` operations +//! +//! Based on Diffx's [Git Delta Binary Diffs](https://diffx.org/spec/binary-diffs.html#git-delta-binary-diffs) + +use std::fmt; + +/// Applies delta instructions to an original file, producing the modified file. +pub fn apply(original: &[u8], delta: &[u8]) -> Result, DeltaError> { + let mut cursor = DeltaCursor::new(delta); + + let header_orig_size = cursor.read_size()?; + let header_mod_size = cursor.read_size()?; + + // Validate original size + if original.len() as u64 != header_orig_size { + return Err(DeltaError::OriginalSizeMismatch { + expected: header_orig_size, + actual: original.len() as u64, + }); + } + + let mut result = Vec::with_capacity(header_mod_size as usize); + + // Process instructions until we've consumed all delta data + while !cursor.is_empty() { + let control = cursor.read_byte()?; + + if control & 0x80 != 0 { + // COPY instruction + let (src_offset, copy_len) = cursor.read_copy_params(control)?; + let src_end = src_offset + .checked_add(copy_len) + .ok_or(DeltaError::InvalidCopyRange)?; + + if src_end > original.len() { + return Err(DeltaError::CopyOutOfBounds { + offset: src_offset, + length: copy_len, + original_size: original.len(), + }); + } + + result.extend_from_slice(&original[src_offset..src_end]); + } else { + // ADD instruction + let add_len = control as usize; + let data = cursor.read_bytes(add_len)?; + result.extend_from_slice(data); + } + } + + // Validate result size + if result.len() as u64 != header_mod_size { + return Err(DeltaError::ModifiedSizeMismatch { + expected: header_mod_size, + actual: result.len() as u64, + }); + } + + Ok(result) +} + +/// Cursor for reading delta instructions. +struct DeltaCursor<'a> { + data: &'a [u8], + offset: usize, +} + +impl<'a> DeltaCursor<'a> { + fn new(data: &'a [u8]) -> Self { + Self { data, offset: 0 } + } + + fn is_empty(&self) -> bool { + self.offset >= self.data.len() + } + + fn read_byte(&mut self) -> Result { + if self.offset >= self.data.len() { + return Err(DeltaError::UnexpectedEof); + } + let byte = self.data[self.offset]; + self.offset += 1; + Ok(byte) + } + + fn read_bytes(&mut self, len: usize) -> Result<&'a [u8], DeltaError> { + let end = self + .offset + .checked_add(len) + .ok_or(DeltaError::UnexpectedEof)?; + if end > self.data.len() { + return Err(DeltaError::UnexpectedEof); + } + let bytes = &self.data[self.offset..end]; + self.offset = end; + Ok(bytes) + } + + /// Reads a variable-length encoded size from the header. + /// + /// Format: each byte uses 7 bits for value, MSB indicates continuation. + /// Bytes are in little-endian order (LSB first). + fn read_size(&mut self) -> Result { + let mut file_len: u64 = 0; + let mut shift: u32 = 0; + + loop { + let byte = self.read_byte()?; + + // Add 7 bits of value at current shift position + let value = (byte & 0x7F) as u64; + file_len |= value.checked_shl(shift).ok_or(DeltaError::SizeOverflow)?; + + // MSB clear means this is the last byte + if byte & 0x80 == 0 { + break; + } + + shift += 7; + } + + Ok(file_len) + } + + /// Reads `COPY` instruction parameters from the control byte. + /// Returns `(src_offset, copy_len)`. + /// + /// Control byte format is `1oooosss`: + /// + /// * Bits 0-3: src_offset bytes + /// * Bits 4-6: copy_len bytes + fn read_copy_params(&mut self, control: u8) -> Result<(usize, usize), DeltaError> { + let mut src_offset: u32 = 0; + for (mask, shift) in [(0x01, 0), (0x02, 8), (0x04, 16), (0x08, 24)] { + if control & mask != 0 { + let byte = self.read_byte()? as u32; + src_offset |= byte.checked_shl(shift).ok_or(DeltaError::SizeOverflow)?; + } + } + + let mut copy_len: u32 = 0; + for (mask, shift) in [(0x10, 0), (0x20, 8), (0x40, 16)] { + if control & mask != 0 { + let byte = self.read_byte()? as u32; + copy_len |= byte.checked_shl(shift).ok_or(DeltaError::SizeOverflow)?; + } + } + + if copy_len == 0 { + // Size of 0 means 65536 + copy_len = 0x10000; + } + + Ok((src_offset as usize, copy_len as usize)) + } +} + +/// Error type for delta operations. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DeltaError { + /// Unexpected end of delta data. + UnexpectedEof, + /// Size value overflowed during decoding. + SizeOverflow, + /// Original file size doesn't match header. + OriginalSizeMismatch { expected: u64, actual: u64 }, + /// Modified file size doesn't match header. + ModifiedSizeMismatch { expected: u64, actual: u64 }, + /// COPY instruction references out-of-bounds data. + CopyOutOfBounds { + offset: usize, + length: usize, + original_size: usize, + }, + /// COPY range calculation overflowed. + InvalidCopyRange, +} + +impl fmt::Display for DeltaError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + DeltaError::UnexpectedEof => write!(f, "unexpected end of delta data"), + DeltaError::SizeOverflow => write!(f, "size value overflow"), + DeltaError::OriginalSizeMismatch { expected, actual } => { + write!( + f, + "original size mismatch: expected {expected}, got {actual}" + ) + } + DeltaError::ModifiedSizeMismatch { expected, actual } => { + write!( + f, + "modified size mismatch: expected {expected}, got {actual}" + ) + } + DeltaError::CopyOutOfBounds { + offset, + length, + original_size, + } => { + write!( + f, + "copy out of bounds: offset={offset}, length={length}, original_size={original_size}" + ) + } + DeltaError::InvalidCopyRange => write!(f, "copy range calculation overflow"), + } + } +} + +impl std::error::Error for DeltaError {} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn read_size_single_byte() { + // 0x0A = 10, MSB clear = end + let data = [0x0A]; + let mut cursor = DeltaCursor::new(&data); + assert_eq!(cursor.read_size().unwrap(), 10); + } + + #[test] + fn read_size_multi_byte() { + // 0x80 | 0x01 = 1, continue; 0x02 = 2 << 7 = 256; total = 257 + let data = [0x81, 0x02]; + let mut cursor = DeltaCursor::new(&data); + assert_eq!(cursor.read_size().unwrap(), 1 + (2 << 7)); + } + + #[test] + fn apply_add_only() { + // Header: orig_size=0, mod_size=5 + // ADD 5 bytes: "hello" + let delta = [ + 0x00, // orig_size = 0 + 0x05, // mod_size = 5 + 0x05, // ADD 5 bytes + b'h', b'e', b'l', b'l', b'o', + ]; + let result = apply(&[], &delta).unwrap(); + assert_eq!(result, b"hello"); + } + + #[test] + fn apply_copy_only() { + // Header: orig_size=5, mod_size=5 + // COPY offset=0, len=5 + let delta = [ + 0x05, // orig_size = 5 + 0x05, // mod_size = 5 + 0x90, // COPY: control=0x90 (0x80 | 0x10), offset=0, size byte present + 0x05, // size = 5 + ]; + let original = b"hello"; + let result = apply(original, &delta).unwrap(); + assert_eq!(result, b"hello"); + } + + #[test] + fn apply_copy_with_offset() { + // Header: orig_size=10, mod_size=5 + // COPY offset=5, len=5 + let delta = [ + 0x0A, // orig_size = 10 + 0x05, // mod_size = 5 + 0x91, // COPY: 0x80 | 0x10 | 0x01 (offset1 + size1 present) + 0x05, // offset = 5 + 0x05, // size = 5 + ]; + let original = b"helloworld"; + let result = apply(original, &delta).unwrap(); + assert_eq!(result, b"world"); + } + + #[test] + fn apply_mixed_instructions() { + // Create "HELLO world" from "hello world" + // Header: orig_size=11, mod_size=11 + // ADD 5: "HELLO" + // COPY offset=5, len=6: " world" + let delta = [ + 0x0B, // orig_size = 11 + 0x0B, // mod_size = 11 + 0x05, // ADD 5 bytes + b'H', b'E', b'L', b'L', b'O', // "HELLO" + 0x91, // COPY: offset1 + size1 present + 0x05, // offset = 5 + 0x06, // size = 6 + ]; + let original = b"hello world"; + let result = apply(original, &delta).unwrap(); + assert_eq!(result, b"HELLO world"); + } + + #[test] + fn apply_copy_size_zero_means_65536() { + // When size bytes result in 0, it means 65536 + // Header: orig_size=65536, mod_size=65536 + // COPY offset=0, len=65536 (encoded as 0) + let original = vec![0xAB; 65536]; + let delta = [ + 0x80, 0x80, 0x04, // orig_size = 65536 (varint) + 0x80, 0x80, 0x04, // mod_size = 65536 (varint) + 0x80, // COPY: no offset bytes, no size bytes = offset 0, size 65536 + ]; + let result = apply(&original, &delta).unwrap(); + assert_eq!(result.len(), 65536); + assert_eq!(result, original); + } + + #[test] + fn error_original_size_mismatch() { + let delta = [ + 0x0A, // orig_size = 10 + 0x05, // mod_size = 5 + ]; + let original = b"short"; // only 5 bytes + let err = apply(original, &delta).unwrap_err(); + assert!(matches!( + err, + DeltaError::OriginalSizeMismatch { + expected: 10, + actual: 5 + } + )); + } + + #[test] + fn error_copy_out_of_bounds() { + let delta = [ + 0x05, // orig_size = 5 + 0x05, // mod_size = 5 + 0x91, // COPY + 0x0A, // offset = 10 (out of bounds!) + 0x05, // size = 5 + ]; + let original = b"hello"; + let err = apply(original, &delta).unwrap_err(); + assert!(matches!(err, DeltaError::CopyOutOfBounds { .. })); + } + + #[test] + fn error_unexpected_eof_in_add() { + let delta = [ + 0x00, // orig_size = 0 + 0x05, // mod_size = 5 + 0x05, // ADD 5 bytes + b'h', b'i', // only 2 bytes provided + ]; + let err = apply(&[], &delta).unwrap_err(); + assert_eq!(err, DeltaError::UnexpectedEof); + } +} diff --git a/src/binary/mod.rs b/src/binary/mod.rs new file mode 100644 index 00000000..c1c7ab4d --- /dev/null +++ b/src/binary/mod.rs @@ -0,0 +1,562 @@ +//! Git binary diffs support. +//! +//! This module provides parsing and decoding for Git's binary diff format, +//! as generated by `git diff --binary` or `git format-patch --binary`. +//! +//! Based on [DiffX Binary Diffs specification](https://diffx.org/spec/binary-diffs.html). + +#[cfg(feature = "binary")] +mod base85; +#[cfg(feature = "binary")] +mod delta; + +use std::{fmt, ops::Range}; + +/// The type of a binary patch block. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BinaryBlockKind { + /// [Literal](https://diffx.org/spec/binary-diffs.html#git-literal-binary-diffs): + /// contains the full file content, zlib-compressed and Base85-encoded. + Literal, + /// [Delta](https://diffx.org/spec/binary-diffs.html#git-delta-binary-diffs): + /// contains delta instructions to transform one file into another. + Delta, +} + +/// A single block in a binary patch. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct BinaryBlock<'a> { + /// The type of this block (literal content or delta instructions). + pub kind: BinaryBlockKind, + /// The encoded data. + pub data: BinaryData<'a>, +} + +/// A parsed binary patch. +/// +/// A binary patch contains encoded binary data that can be decoded +/// to recover the original and modified file contents. +/// +/// Git may use different encodings for each direction: +/// +/// - `literal`: full file content +/// - `delta`: instructions to transform one file into another +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum BinaryPatch<'a> { + /// A full binary patch with forward and reverse data. + /// + /// The forward block transforms original -> modified. + /// The reverse block transforms modified -> original. + /// + /// Each block can independently be either `literal` or `delta`. + Full { + /// Forward transformation (original -> modified). + forward: BinaryBlock<'a>, + /// Reverse transformation (modified -> original). + reverse: BinaryBlock<'a>, + }, + /// A Git binary diff marker. + /// + /// This represents the `Binary files a/path and b/path differ` case, + /// where git detected a binary change but didn't include the actual data. + /// + /// Calling [`apply()`](Self::apply) on this variant returns an error. + Marker, +} + +impl<'a> BinaryPatch<'a> { + /// Applies a binary patch forward: original -> modified. + /// + /// - If the forward block is `Literal`: returns the decoded content directly. + /// - If the forward block is `Delta`: applies delta instructions to `original`. + /// + /// Unlike `git apply`, this doesn't validate the original content hash. + #[cfg(feature = "binary")] + pub fn apply(&self, original: &[u8]) -> Result, BinaryPatchParseError> { + match self { + BinaryPatch::Full { forward, .. } => Self::apply_block(forward, original), + BinaryPatch::Marker => Err(BinaryPatchParseErrorKind::NoBinaryData.into()), + } + } + + /// Applies a binary patch in reverse: modified -> original. + /// + /// - If the reverse block is `Literal`: returns the decoded content directly. + /// - If the reverse block is `Delta`: applies delta instructions to `modified`. + /// + /// Unlike `git apply`, this doesn't validate the modified content hash. + #[cfg(feature = "binary")] + pub fn apply_reverse(&self, modified: &[u8]) -> Result, BinaryPatchParseError> { + match self { + BinaryPatch::Full { reverse, .. } => Self::apply_block(reverse, modified), + BinaryPatch::Marker => Err(BinaryPatchParseErrorKind::NoBinaryData.into()), + } + } + + /// Applies a single block (either literal or delta). + #[cfg(feature = "binary")] + fn apply_block(block: &BinaryBlock<'_>, base: &[u8]) -> Result, BinaryPatchParseError> { + match block.kind { + BinaryBlockKind::Literal => Self::decode_data(&block.data), + BinaryBlockKind::Delta => { + let delta_instructions = Self::decode_data(&block.data)?; + delta::apply(base, &delta_instructions).map_err(BinaryPatchParseError::from) + } + } + } + + /// See [Decoding Logic](https://diffx.org/spec/binary-diffs.html#decoding-logic) + #[cfg(feature = "binary")] + fn decode_data(binary_data: &BinaryData<'_>) -> Result, BinaryPatchParseError> { + use std::io::Read; + + let compressed = decode_base85_lines(binary_data.data)?; + + let mut decoder = flate2::read::ZlibDecoder::new(&compressed[..]); + let mut decompressed = Vec::new(); + decoder + .read_to_end(&mut decompressed) + .map_err(|e| BinaryPatchParseErrorKind::DecompressionFailed(e.to_string()))?; + + Ok(decompressed) + } +} + +/// Represents a single binary payload in a Git binary diff. +/// +/// For example, the following patch block +/// +/// * is parsed as `BinaryData { size: 10, data: "UcmV+l0QLU>0RjUA1qKHQ2>\`DEE&u=k" }` +/// * The line starts with a length indicator (`U` = 21 decoded bytes) +/// +/// +/// ```text +/// literal 10 +/// UcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k +/// ``` +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct BinaryData<'a> { + /// Uncompressed size in bytes. + pub size: u64, + /// Raw Base85 lines with length indicators. + pub data: &'a str, +} + +/// Error type for binary patch operations. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct BinaryPatchParseError { + pub(crate) kind: BinaryPatchParseErrorKind, + span: Option>, +} + +impl BinaryPatchParseError { + /// Creates a new error with the given kind and span. + pub(crate) fn new(kind: BinaryPatchParseErrorKind, span: Range) -> Self { + Self { + kind, + span: Some(span), + } + } + + /// Returns the byte range in the input where the error occurred. + pub fn span(&self) -> Option> { + self.span.clone() + } +} + +impl fmt::Display for BinaryPatchParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(span) = &self.span { + write!( + f, + "error parsing binary patch at byte {}: {}", + span.start, self.kind + ) + } else { + write!(f, "error parsing binary patch: {}", self.kind) + } + } +} + +impl std::error::Error for BinaryPatchParseError {} + +#[cfg(feature = "binary")] +impl From for BinaryPatchParseError { + fn from(e: base85::Base85Error) -> Self { + BinaryPatchParseErrorKind::Base85(e).into() + } +} + +#[cfg(feature = "binary")] +impl From for BinaryPatchParseError { + fn from(e: delta::DeltaError) -> Self { + BinaryPatchParseErrorKind::Delta(e).into() + } +} + +impl From for BinaryPatchParseError { + fn from(kind: BinaryPatchParseErrorKind) -> Self { + Self { kind, span: None } + } +} + +/// The kind of error that occurred when parsing a binary patch. +#[derive(Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub(crate) enum BinaryPatchParseErrorKind { + /// Missing or invalid "GIT binary patch" header. + // TODO: Switch to #[expect(dead_code)] when MSRV >= 1.81 + #[cfg_attr(not(feature = "binary"), allow(dead_code))] + InvalidHeader, + + /// First binary block (forward) not found. + MissingForwardBlock, + + /// Second binary block (reverse) not found. + MissingReverseBlock, + + /// No binary data available (marker-only patch). + // TODO: Switch to #[expect(dead_code)] when MSRV >= 1.81 + #[cfg_attr(not(feature = "binary"), allow(dead_code))] + NoBinaryData, + + /// Invalid line length indicator in Base85 data. + // TODO: Switch to #[expect(dead_code)] when MSRV >= 1.81 + #[cfg_attr(not(feature = "binary"), allow(dead_code))] + InvalidLineLengthIndicator, + + /// Base85 decoding failed. + #[cfg(feature = "binary")] + Base85(base85::Base85Error), + + /// Delta application failed. + #[cfg(feature = "binary")] + Delta(delta::DeltaError), + + /// Zlib decompression failed. + #[cfg(feature = "binary")] + DecompressionFailed(String), +} + +impl fmt::Display for BinaryPatchParseErrorKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::InvalidHeader => write!(f, "invalid binary patch header"), + Self::MissingForwardBlock => write!(f, "first binary block not found"), + Self::MissingReverseBlock => write!(f, "second binary block not found"), + Self::NoBinaryData => write!(f, "no binary data available"), + Self::InvalidLineLengthIndicator => write!(f, "invalid line length indicator"), + #[cfg(feature = "binary")] + Self::Base85(e) => write!(f, "{e}"), + #[cfg(feature = "binary")] + Self::Delta(e) => write!(f, "{e}"), + #[cfg(feature = "binary")] + Self::DecompressionFailed(msg) => write!(f, "decompression failed: {msg}"), + } + } +} + +/// Simple streaming parser for binary patches using lines() iterator. +struct BinaryParser<'a> { + input: &'a str, + offset: usize, +} + +impl<'a> BinaryParser<'a> { + fn new(input: &'a str) -> Self { + Self { input, offset: 0 } + } + + fn peek_line(&self) -> Option<&'a str> { + self.input[self.offset..].lines().next() + } + + /// Creates an error with the current offset as span. + fn error(&self, kind: BinaryPatchParseErrorKind) -> BinaryPatchParseError { + BinaryPatchParseError::new(kind, self.offset..self.offset) + } + + fn next_line(&mut self) -> Option<&'a str> { + let rest = &self.input[self.offset..]; + let line = rest.lines().next()?; + self.offset += line.len(); + + // Skip line ending (CRLF or LF) + let remaining = &self.input[self.offset..]; + if remaining.starts_with("\r\n") { + self.offset += 2; + } else if remaining.starts_with('\n') { + self.offset += 1; + } + + Some(line) + } + + fn slice_from(&self, start_offset: usize) -> &'a str { + let end = self.offset; + // Strip trailing line ending before blank line + let slice = &self.input[start_offset..end]; + slice + .strip_suffix("\r\n") + .or_else(|| slice.strip_suffix('\n')) + .unwrap_or(slice) + } +} + +/// Parses binary patch content after git extended headers. +/// +/// Expects input starting with "GIT binary patch" line. +/// Returns the parsed patch and the number of bytes consumed. +/// +/// Format: +/// +/// ```text +/// GIT binary patch +/// +/// +/// +/// +/// +/// ``` +pub(crate) fn parse_binary_patch( + input: &str, +) -> Result<(BinaryPatch<'_>, usize), BinaryPatchParseError> { + let mut parser = BinaryParser::new(input); + + // Expect "GIT binary patch" marker + if parser.next_line() != Some("GIT binary patch") { + return Err(parser.error(BinaryPatchParseErrorKind::InvalidHeader)); + } + + // Parse first block (forward: original -> modified) + let Some(forward) = parse_binary_block(&mut parser) else { + return Err(parser.error(BinaryPatchParseErrorKind::MissingForwardBlock)); + }; + + // Parse second block (reverse: modified -> original) + let Some(reverse) = parse_binary_block(&mut parser) else { + return Err(parser.error(BinaryPatchParseErrorKind::MissingReverseBlock)); + }; + + Ok((BinaryPatch::Full { forward, reverse }, parser.offset)) +} + +/// Parses a single binary block. +/// +/// Returns a `BinaryBlock` with kind (literal/delta) and data. +fn parse_binary_block<'a>(parser: &mut BinaryParser<'a>) -> Option> { + // Parse "literal 10" or "delta 18" + let format_line = parser.next_line()?; + let (patch_type, size_str) = format_line.split_once(' ')?; + let size: u64 = size_str.parse().ok()?; + + let kind = match patch_type { + "literal" => BinaryBlockKind::Literal, + "delta" => BinaryBlockKind::Delta, + _ => return None, + }; + + // Record start of Base85 data + let data_start = parser.offset; + + // Consume Base85 lines until blank line + while let Some(line) = parser.peek_line() { + if line.is_empty() { + parser.next_line(); // Consume blank line + break; + } + parser.next_line(); + } + + let data = parser.slice_from(data_start); + + Some(BinaryBlock { + kind, + data: BinaryData { size, data }, + }) +} + +/// Decodes multi-line Base85 data with length indicators. +/// +/// Each line has the format: `` +/// +/// From [5.1.1. Binary Payloads](https://diffx.org/spec/binary-diffs.html#binary-payloads): +/// +/// > Each line represents up to 52 bytes of pre-encoded data. +/// > There may be an unlimited number of lines. +/// > They contain the following fields: +/// > +/// > * `len_c` is a line length character. +/// > This encodes the length of the (pre-encoded) data written on this line. +/// > * `data` is Base85-encoded data for this line. +#[cfg(feature = "binary")] +fn decode_base85_lines(data: &str) -> Result, BinaryPatchParseError> { + // A rough estimate: In Base85, 5 chars -> 4 bytes + let mut result = Vec::with_capacity(data.len() * 4 / 5); + + for line in data.lines() { + if line.is_empty() { + continue; + } + + let line_bytes = line.as_bytes(); + + let length = decode_line_length(line_bytes[0]) + .ok_or(BinaryPatchParseErrorKind::InvalidLineLengthIndicator)?; + let encoded = &line[1..]; + let start = result.len(); + base85::decode_into(encoded, &mut result)?; + result.truncate(start + length); + } + + Ok(result) +} + +/// Decodes a line length character to its numeric value. +/// +/// From [Line Length Characters](https://diffx.org/spec/binary-diffs.html#line-length-characters): +/// +/// > Each encoded line in a binary diff payload is prefixed by a line length character. +/// > This encodes the length of the compressed (but not encoded) data for the line. +/// > +/// > Line length characters always represent a value between 1 and 52: +/// > +/// > * A value of A-Z represents a number between 1..26. +/// > * A value of a-z represents a number between 27..52. +#[cfg(feature = "binary")] +fn decode_line_length(c: u8) -> Option { + match c { + b'A'..=b'Z' => Some((c - b'A' + 1) as usize), + b'a'..=b'z' => Some((c - b'a' + 27) as usize), + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_literal_format_simple() { + let input = "GIT binary patch\nliteral 10\nUcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k\n\nliteral 0\nKcmV+b0RR6000031\n\n"; + let (patch, consumed) = parse_binary_patch(input).unwrap(); + + assert_eq!(consumed, input.len()); + match &patch { + BinaryPatch::Full { forward, reverse } => { + assert_eq!(forward.kind, BinaryBlockKind::Literal); + assert_eq!(forward.data.size, 10); + assert_eq!(reverse.kind, BinaryBlockKind::Literal); + assert_eq!(reverse.data.size, 0); + } + _ => panic!("expected Full variant"), + } + } + + #[test] + fn parse_delta_format() { + let input = "GIT binary patch\ndelta 18\nccmV+t0PX*P2!IH%^Z^9`00000v-trB0x!=5aR2}S\n\ndelta 18\nccmV+t0PX*P2!IH%^Z^BFm9#}av-trB0zxAOrvLx|\n\n"; + let (patch, _) = parse_binary_patch(input).unwrap(); + + match &patch { + BinaryPatch::Full { forward, reverse } => { + assert_eq!(forward.kind, BinaryBlockKind::Delta); + assert_eq!(forward.data.size, 18); + assert_eq!(reverse.kind, BinaryBlockKind::Delta); + assert_eq!(reverse.data.size, 18); + } + _ => panic!("expected Full variant"), + } + } + + #[test] + fn parse_invalid_header() { + // Without "GIT binary patch" marker, parse_binary_patch returns error + let input = "literal 10\nUcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k\n\n"; + let err = parse_binary_patch(input).unwrap_err(); + assert_eq!(err.kind, BinaryPatchParseErrorKind::InvalidHeader); + } + + #[test] + fn parse_with_crlf_line_endings() { + let input = "GIT binary patch\r\nliteral 10\r\nUcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k\r\n\r\nliteral 0\r\nKcmV+b0RR6000031\r\n\r\n"; + let (patch, consumed) = parse_binary_patch(input).unwrap(); + + assert_eq!(consumed, input.len()); + match &patch { + BinaryPatch::Full { forward, reverse } => { + assert_eq!(forward.kind, BinaryBlockKind::Literal); + assert_eq!(forward.data.size, 10); + assert_eq!(reverse.kind, BinaryBlockKind::Literal); + assert_eq!(reverse.data.size, 0); + } + _ => panic!("expected Full variant"), + } + } + + #[test] + fn parse_mixed_format() { + // Git can use different encoding for each direction + let input = "GIT binary patch\nliteral 10\nUcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k\n\ndelta 18\nccmV+t0PX*P2!IH%^Z^9`00000v-trB0x!=5aR2}S\n\n"; + let (patch, _) = parse_binary_patch(input).unwrap(); + + match &patch { + BinaryPatch::Full { forward, reverse } => { + assert_eq!(forward.kind, BinaryBlockKind::Literal); + assert_eq!(reverse.kind, BinaryBlockKind::Delta); + } + _ => panic!("expected Full variant"), + } + } +} + +#[cfg(test)] +#[cfg(feature = "binary")] +mod apply_tests { + use super::*; + + #[test] + fn decode_line_length_uppercase() { + assert_eq!(decode_line_length(b'A'), Some(1)); + assert_eq!(decode_line_length(b'B'), Some(2)); + assert_eq!(decode_line_length(b'Z'), Some(26)); + } + + #[test] + fn decode_line_length_lowercase() { + assert_eq!(decode_line_length(b'a'), Some(27)); + assert_eq!(decode_line_length(b'b'), Some(28)); + assert_eq!(decode_line_length(b'z'), Some(52)); + } + + #[test] + fn decode_line_length_invalid() { + assert_eq!(decode_line_length(b'0'), None); + assert_eq!(decode_line_length(b'!'), None); + assert_eq!(decode_line_length(b' '), None); + } + + #[test] + fn apply_literal_patch() { + let input = "GIT binary patch\nliteral 10\nUcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k\n\nliteral 0\nKcmV+b0RR6000031\n\n"; + let (patch, _) = parse_binary_patch(input).unwrap(); + + let modified = patch.apply(&[]).unwrap(); + assert_eq!(modified.len(), 10); + assert_eq!(modified, vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + + let original = patch.apply_reverse(&[]).unwrap(); + assert_eq!(original.len(), 0); + } + + #[test] + fn apply_with_crlf_line_endings() { + let input = "GIT binary patch\r\nliteral 10\r\nUcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k\r\n\r\nliteral 0\r\nKcmV+b0RR6000031\r\n\r\n"; + let (patch, _) = parse_binary_patch(input).unwrap(); + + let modified = patch.apply(&[]).unwrap(); + assert_eq!(modified, vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + let original = patch.apply_reverse(&[]).unwrap(); + assert_eq!(original.len(), 0); + } +} diff --git a/src/diff/myers.rs b/src/diff/myers.rs index 73eb4106..502d0772 100644 --- a/src/diff/myers.rs +++ b/src/diff/myers.rs @@ -69,7 +69,7 @@ impl ::std::fmt::Display for Snake { fn max_d(len1: usize, len2: usize) -> usize { // XXX look into reducing the need to have the additional '+ 1' - (len1 + len2 + 1) / 2 + 1 + (len1 + len2).div_ceil(2) + 1 } // The divide part of a divide-and-conquer strategy. A D-path has D+1 snakes some of which may diff --git a/src/lib.rs b/src/lib.rs index 33ebbb64..5e11a40d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -223,6 +223,7 @@ //! [`create_patch_bytes`]: fn.create_patch_bytes.html mod apply; +pub mod binary; mod diff; mod merge; mod patch; diff --git a/src/patch/parse.rs b/src/patch/parse.rs index dd74b15c..2ccb409a 100644 --- a/src/patch/parse.rs +++ b/src/patch/parse.rs @@ -12,6 +12,42 @@ use std::borrow::Cow; type Result = std::result::Result; +/// Options that control parsing behavior. +/// +/// Defaults match the [`parse`]/[`parse_bytes`] behavior. +#[derive(Clone, Copy)] +pub(crate) struct ParseOpts { + skip_preamble: bool, + reject_orphaned_hunks: bool, +} + +impl Default for ParseOpts { + fn default() -> Self { + Self { + skip_preamble: true, + reject_orphaned_hunks: false, + } + } +} + +impl ParseOpts { + /// Don't skip preamble lines before `---`/`+++`/`@@`. + /// + /// Useful when the caller has already positioned the input + /// at the start of the patch content. + pub(crate) fn no_skip_preamble(mut self) -> Self { + self.skip_preamble = false; + self + } + + /// Reject orphaned `@@ ` hunk headers after parsed hunks, + /// matching `git apply` behavior. + pub(crate) fn reject_orphaned_hunks(mut self) -> Self { + self.reject_orphaned_hunks = true; + self + } +} + struct Parser<'a, T: Text + ?Sized> { lines: std::iter::Peekable>, offset: usize, @@ -54,7 +90,22 @@ impl<'a, T: Text + ?Sized> Parser<'a, T> { } pub fn parse(input: &str) -> Result> { - let (result, _consumed) = parse_one(input); + let (result, _consumed) = parse_one(input, ParseOpts::default()); + result +} + +pub fn parse_strict(input: &str) -> Result> { + let (result, _consumed) = parse_one(input, ParseOpts::default().reject_orphaned_hunks()); + result +} + +pub fn parse_bytes(input: &[u8]) -> Result> { + let (result, _consumed) = parse_one(input, ParseOpts::default()); + result +} + +pub fn parse_bytes_strict(input: &[u8]) -> Result> { + let (result, _consumed) = parse_one(input, ParseOpts::default().reject_orphaned_hunks()); result } @@ -62,10 +113,13 @@ pub fn parse(input: &str) -> Result> { /// /// Always returns consumed bytes alongside the result /// so callers can advance past the parsed or partially parsed content. -pub(crate) fn parse_one(input: &str) -> (Result>, usize) { +pub(crate) fn parse_one<'a, T: Text + ?Sized>( + input: &'a T, + opts: ParseOpts, +) -> (Result>, usize) { let mut parser = Parser::new(input); - let header = match patch_header(&mut parser) { + let header = match patch_header(&mut parser, &opts) { Ok(h) => h, Err(e) => return (Err(e), parser.offset()), }; @@ -73,58 +127,28 @@ pub(crate) fn parse_one(input: &str) -> (Result>, usize) { Ok(h) => h, Err(e) => return (Err(e), parser.offset()), }; + if opts.reject_orphaned_hunks { + if let Err(e) = reject_orphaned_hunk_headers(&mut parser) { + return (Err(e), parser.offset()); + } + } let patch = Patch::new( - header.0.map(convert_cow_to_str), - header.1.map(convert_cow_to_str), + header.0.map(T::from_bytes_cow), + header.1.map(T::from_bytes_cow), hunks, ); (Ok(patch), parser.offset()) } -pub fn parse_strict(input: &str) -> Result> { - let mut parser = Parser::new(input); - let header = patch_header(&mut parser)?; - let hunks = hunks(&mut parser)?; - reject_orphaned_hunk_headers(&mut parser)?; - - Ok(Patch::new( - header.0.map(convert_cow_to_str), - header.1.map(convert_cow_to_str), - hunks, - )) -} - -pub fn parse_bytes(input: &[u8]) -> Result> { - let mut parser = Parser::new(input); - let header = patch_header(&mut parser)?; - let hunks = hunks(&mut parser)?; - - Ok(Patch::new(header.0, header.1, hunks)) -} - -pub fn parse_bytes_strict(input: &[u8]) -> Result> { - let mut parser = Parser::new(input); - let header = patch_header(&mut parser)?; - let hunks = hunks(&mut parser)?; - reject_orphaned_hunk_headers(&mut parser)?; - - Ok(Patch::new(header.0, header.1, hunks)) -} - -// This is only used when the type originated as a utf8 string -fn convert_cow_to_str(cow: Cow<'_, [u8]>) -> Cow<'_, str> { - match cow { - Cow::Borrowed(b) => std::str::from_utf8(b).unwrap().into(), - Cow::Owned(o) => String::from_utf8(o).unwrap().into(), - } -} - #[allow(clippy::type_complexity)] -fn patch_header<'a, T: Text + ToOwned + ?Sized>( +fn patch_header<'a, T: Text + ?Sized>( parser: &mut Parser<'a, T>, + opts: &ParseOpts, ) -> Result<(Option>, Option>)> { - skip_header_preamble(parser)?; + if opts.skip_preamble { + skip_header_preamble(parser)?; + } let mut filename1 = None; let mut filename2 = None; @@ -161,10 +185,7 @@ fn skip_header_preamble(parser: &mut Parser<'_, T>) -> Result< Ok(()) } -fn parse_filename<'a, T: Text + ToOwned + ?Sized>( - prefix: &str, - line: &'a T, -) -> Result> { +fn parse_filename<'a, T: Text + ?Sized>(prefix: &str, line: &'a T) -> Result> { let line = line .strip_prefix(prefix) .ok_or(ParsePatchErrorKind::InvalidFilename)?; diff --git a/src/patch_set/error.rs b/src/patch_set/error.rs index ba60e861..77ccd0c1 100644 --- a/src/patch_set/error.rs +++ b/src/patch_set/error.rs @@ -3,6 +3,7 @@ use std::fmt; use std::ops::Range; +use crate::binary::BinaryPatchParseError; use crate::patch::ParsePatchError; /// An error returned when parsing patches fails. @@ -70,6 +71,21 @@ pub(crate) enum PatchSetParseErrorKind { /// Create patch missing modified path. CreateMissingModifiedPath, + + /// Invalid file mode string. + InvalidFileMode(String), + + /// Invalid `diff --git` path. + InvalidDiffGitPath, + + /// File path contains invalid UTF-8. + InvalidUtf8Path, + + /// Binary diff not supported in current configuration. + BinaryNotSupported { path: String }, + + /// Binary patch parsing failed. + BinaryParse(BinaryPatchParseError), } impl fmt::Display for PatchSetParseErrorKind { @@ -81,6 +97,13 @@ impl fmt::Display for PatchSetParseErrorKind { Self::BothDevNull => write!(f, "patch has both original and modified as /dev/null"), Self::DeleteMissingOriginalPath => write!(f, "delete patch has no original path"), Self::CreateMissingModifiedPath => write!(f, "create patch has no modified path"), + Self::InvalidFileMode(mode) => write!(f, "invalid file mode: {mode}"), + Self::InvalidDiffGitPath => write!(f, "invalid diff --git path"), + Self::InvalidUtf8Path => write!(f, "file path is not valid UTF-8"), + Self::BinaryNotSupported { path } => { + write!(f, "binary diff not supported: {path}") + } + Self::BinaryParse(e) => write!(f, "{e}"), } } } @@ -90,3 +113,9 @@ impl From for PatchSetParseError { PatchSetParseErrorKind::Patch(e).into() } } + +impl From for PatchSetParseError { + fn from(e: BinaryPatchParseError) -> Self { + PatchSetParseErrorKind::BinaryParse(e).into() + } +} diff --git a/src/patch_set/mod.rs b/src/patch_set/mod.rs index 7e15a6e3..6141c54a 100644 --- a/src/patch_set/mod.rs +++ b/src/patch_set/mod.rs @@ -10,14 +10,29 @@ mod tests; use std::borrow::Cow; +use crate::binary::BinaryPatch; use crate::Patch; pub use error::PatchSetParseError; +use error::PatchSetParseErrorKind; pub use parse::PatchSet; /// Options for parsing patch content. /// -/// Use [`ParseOptions::unidiff()`] to create options for the desired format. +/// Use [`ParseOptions::unidiff()`] or [`ParseOptions::gitdiff()`] +/// to create options for the desired format. +/// +/// ## Binary Files +/// +/// When parsing git diffs, binary file changes are detected by: +/// +/// * `Binary files a/path and b/path differ` (`git diff` without `--binary` flag) +/// * `GIT binary patch` (from `git diff --binary`) +/// +/// Note that this is not a documented Git behavior, +/// so the implementation here is subject to change if Git changes. +/// +/// By default, binary diffs are kept in the output. /// /// ## Example /// @@ -40,12 +55,28 @@ pub use parse::PatchSet; #[derive(Debug, Clone)] pub struct ParseOptions { pub(crate) format: Format, + pub(crate) binary: Binary, } -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, Default)] pub(crate) enum Format { /// Standard unified diff format. + #[default] UniDiff, + /// Git extended diff format. + GitDiff, +} + +/// How to handle binary diffs in GitDiff mode. +#[derive(Debug, Clone, Copy, Default)] +pub(crate) enum Binary { + /// Skip binary diffs silently. + Skip, + /// Return error if binary diff encountered. + Fail, + /// Keep binary diffs in the output. + #[default] + Keep, } impl ParseOptions { @@ -64,8 +95,38 @@ impl ParseOptions { pub fn unidiff() -> Self { Self { format: Format::UniDiff, + binary: Default::default(), } } + + /// Parse as [git extended diff format][git-diff-format]. + /// + /// Supports all features of [`unidiff()`](Self::unidiff) plus: + /// + /// * `diff --git` headers + /// * Extended headers (`new file mode`, `deleted file mode`, etc.) + /// * Rename/copy detection (`rename from`/`rename to`, `copy from`/`copy to`) + /// * Binary file detection (kept by default) + /// + /// [git-diff-format]: https://git-scm.com/docs/diff-format + pub fn gitdiff() -> Self { + Self { + format: Format::GitDiff, + binary: Default::default(), + } + } + + /// Skip binary diffs silently. + pub fn skip_binary(mut self) -> Self { + self.binary = Binary::Skip; + self + } + + /// Return an error if a binary diff is encountered. + pub fn fail_on_binary(mut self) -> Self { + self.binary = Binary::Fail; + self + } } /// File mode extracted from git extended headers. @@ -81,11 +142,27 @@ pub enum FileMode { Gitlink, } +impl std::str::FromStr for FileMode { + type Err = PatchSetParseError; + + fn from_str(mode: &str) -> Result { + match mode { + "100644" => Ok(Self::Regular), + "100755" => Ok(Self::Executable), + "120000" => Ok(Self::Symlink), + "160000" => Ok(Self::Gitlink), + _ => Err(PatchSetParseErrorKind::InvalidFileMode(mode.to_owned()).into()), + } + } +} + /// The kind of patch content in a [`FilePatch`]. #[derive(Clone, PartialEq, Eq)] pub enum PatchKind<'a, T: ToOwned + ?Sized> { /// Text patch with hunks. Text(Patch<'a, T>), + /// Binary patch (literal or delta encoded, or marker-only). + Binary(BinaryPatch<'a>), } impl std::fmt::Debug for PatchKind<'_, T> @@ -96,6 +173,7 @@ where fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { PatchKind::Text(patch) => f.debug_tuple("Text").field(patch).finish(), + PatchKind::Binary(patch) => f.debug_tuple("Binary").field(patch).finish(), } } } @@ -105,6 +183,15 @@ impl<'a, T: ToOwned + ?Sized> PatchKind<'a, T> { pub fn as_text(&self) -> Option<&Patch<'a, T>> { match self { PatchKind::Text(patch) => Some(patch), + PatchKind::Binary(_) => None, + } + } + + /// Returns the binary patch, or `None` if this is a text patch. + pub fn as_binary(&self) -> Option<&BinaryPatch<'a>> { + match self { + PatchKind::Binary(patch) => Some(patch), + PatchKind::Text(_) => None, } } } @@ -152,6 +239,20 @@ impl<'a, T: ToOwned + ?Sized> FilePatch<'a, T> { } } + fn new_binary( + operation: FileOperation<'a>, + patch: BinaryPatch<'a>, + old_mode: Option, + new_mode: Option, + ) -> Self { + Self { + operation, + kind: PatchKind::Binary(patch), + old_mode, + new_mode, + } + } + /// Returns the file operation for this patch. pub fn operation(&self) -> &FileOperation<'a> { &self.operation diff --git a/src/patch_set/parse.rs b/src/patch_set/parse.rs index c4864f95..08502d46 100644 --- a/src/patch_set/parse.rs +++ b/src/patch_set/parse.rs @@ -1,10 +1,13 @@ //! Parse multiple file patches from a unified diff. use super::{ - error::PatchSetParseErrorKind, FileOperation, FilePatch, Format, ParseOptions, - PatchSetParseError, + error::PatchSetParseErrorKind, Binary, FileMode, FileOperation, FilePatch, Format, + ParseOptions, PatchSetParseError, }; +use crate::binary::{parse_binary_patch, BinaryPatch}; use crate::patch::parse::parse_one; +use crate::utils::{escaped_filename, Text}; +use crate::Patch; use std::borrow::Cow; @@ -45,15 +48,15 @@ const EMAIL_PREAMBLE_SEPARATOR: &str = "\n---\n"; /// println!("{:?}", patch.operation()); /// } /// ``` -pub struct PatchSet<'a> { - input: &'a str, +pub struct PatchSet<'a, T: ?Sized = str> { + input: &'a T, offset: usize, opts: ParseOptions, finished: bool, found_any: bool, } -impl<'a> PatchSet<'a> { +impl<'a> PatchSet<'a, str> { /// Creates a streaming parser for multiple file patches. pub fn parse(input: &'a str, opts: ParseOptions) -> Self { // Strip email preamble once at construction @@ -66,87 +69,250 @@ impl<'a> PatchSet<'a> { found_any: false, } } +} + +impl<'a> Iterator for PatchSet<'a, str> { + type Item = Result, PatchSetParseError>; - /// Creates an error with the current offset as span. - fn error(&self, kind: PatchSetParseErrorKind) -> PatchSetParseError { - PatchSetParseError::new(kind, self.offset..self.offset) + fn next(&mut self) -> Option { + next_patch(self) } +} - fn next_unidiff_patch(&mut self) -> Option, PatchSetParseError>> { - let remaining = &self.input[self.offset..]; - if remaining.is_empty() { - return None; +impl<'a> PatchSet<'a, [u8]> { + /// Creates a streaming parser for multiple file patches from raw bytes. + /// + /// Unlike [`PatchSet::parse`], this preserves non-UTF-8 content in hunks + /// (e.g., binary files that git misdetects as text). + pub fn parse_bytes(input: &'a [u8], opts: ParseOptions) -> Self { + let input = strip_email_preamble(input); + Self { + input, + offset: 0, + opts, + finished: false, + found_any: false, } + } +} - let patch_start = find_patch_start(remaining)?; - self.found_any = true; +impl<'a> Iterator for PatchSet<'a, [u8]> { + type Item = Result, PatchSetParseError>; - let patch_input = &remaining[patch_start..]; + fn next(&mut self) -> Option { + next_patch(self) + } +} - let (result, consumed) = parse_one(patch_input); - // Always advance so the iterator makes progress even on error. - let abs_patch_start = self.offset + patch_start; - self.offset += patch_start + consumed; +fn next_patch<'a, T: Text + ?Sized>( + ps: &mut PatchSet<'a, T>, +) -> Option, PatchSetParseError>> { + if ps.finished { + return None; + } - let patch = match result { - Ok(patch) => patch, - Err(e) => return Some(Err(e.into())), - }; - let operation = match extract_file_op_unidiff(patch.original_path(), patch.modified_path()) - { - Ok(op) => op, - Err(mut e) => { - e.set_span(abs_patch_start..abs_patch_start); - return Some(Err(e)); - } - }; + let result = match ps.opts.format { + Format::UniDiff => next_unidiff_patch(ps), + Format::GitDiff => next_gitdiff_patch(ps), + }; - Some(Ok(FilePatch::new(operation, patch, None, None))) + if result.is_none() { + ps.finished = true; + if !ps.found_any { + let err = PatchSetParseError::new( + PatchSetParseErrorKind::NoPatchesFound, + ps.offset..ps.offset, + ); + return Some(Err(err)); + } } + + result } -impl<'a> Iterator for PatchSet<'a> { - type Item = Result, PatchSetParseError>; +fn next_gitdiff_patch<'a, T: Text + ?Sized>( + ps: &mut PatchSet<'a, T>, +) -> Option, PatchSetParseError>> { + let patch_start = find_gitdiff_start(remaining(ps))?; + ps.offset += patch_start; + ps.found_any = true; - fn next(&mut self) -> Option { - if self.finished { - return None; + let abs_patch_start = ps.offset; + + // Parse extended headers incrementally — stops at first unrecognized line + let (header, header_consumed) = GitHeader::parse(remaining(ps)); + ps.offset += header_consumed; + + // Handle "Binary files ... differ" (no patch data) + if header.is_binary_marker { + match ps.opts.binary { + Binary::Skip => { + return next_gitdiff_patch(ps); + } + Binary::Fail => { + let path = header.diff_git_line.unwrap_or("").to_owned(); + return Some(Err(PatchSetParseError::new( + PatchSetParseErrorKind::BinaryNotSupported { path }, + abs_patch_start..abs_patch_start, + ))); + } + Binary::Keep => { + // FIXME: error spans point at `diff --git` line, not the specific offending line + let operation = match extract_file_op_binary(&header) { + Ok(op) => op, + Err(mut e) => { + e.set_span(abs_patch_start..abs_patch_start); + return Some(Err(e)); + } + }; + let (old_mode, new_mode) = match parse_file_modes(&header) { + Ok(modes) => modes, + Err(mut e) => { + e.set_span(abs_patch_start..abs_patch_start); + return Some(Err(e)); + } + }; + return Some(Ok(FilePatch::new_binary( + operation, + BinaryPatch::Marker, + old_mode, + new_mode, + ))); + } } + } - let result = match self.opts.format { - Format::UniDiff => { - let result = self.next_unidiff_patch(); - if result.is_none() { - self.finished = true; - if !self.found_any { - return Some(Err(self.error(PatchSetParseErrorKind::NoPatchesFound))); + // Handle "GIT binary patch" (has patch data) + if let Some(binary_patch_start) = header.binary_patch_offset { + match ps.opts.binary { + Binary::Skip => { + return next_gitdiff_patch(ps); + } + Binary::Fail => { + let path = header.diff_git_line.unwrap_or("").to_owned(); + return Some(Err(PatchSetParseError::new( + PatchSetParseErrorKind::BinaryNotSupported { path }, + abs_patch_start..abs_patch_start, + ))); + } + Binary::Keep => { + // GitHeader::parse consumed the marker line but not the payload. + // Use the recorded offset to pass input from the marker onward. + let (_, binary_input) = ps.input.split_at(abs_patch_start + binary_patch_start); + // Binary patch data is always ASCII (base85-encoded). + let binary_input = binary_input.as_str_prefix(); + let (binary_patch, consumed) = match parse_binary_patch(binary_input) { + Ok(result) => result, + Err(e) => return Some(Err(e.into())), + }; + ps.offset = abs_patch_start + binary_patch_start + consumed; + + // FIXME: error spans point at `diff --git` line, not the specific offending line + let operation = match extract_file_op_binary(&header) { + Ok(op) => op, + Err(mut e) => { + e.set_span(abs_patch_start..abs_patch_start); + return Some(Err(e)); } - } - result + }; + let (old_mode, new_mode) = match parse_file_modes(&header) { + Ok(modes) => modes, + Err(mut e) => { + e.set_span(abs_patch_start..abs_patch_start); + return Some(Err(e)); + } + }; + return Some(Ok(FilePatch::new_binary( + operation, + binary_patch, + old_mode, + new_mode, + ))); } - }; + } + } + + // `git diff` output format is stricter. + // There is no preamble between Git headers and unidiff patch portion, + // so we safely don't perform the preamble skipping. + // + // If we did, it would fail the pure rename/mode-change operation + // since those ops have no unidiff patch portion + // and is directly followed by the next `diff --git` header. + let opts = crate::patch::parse::ParseOpts::default().no_skip_preamble(); + let (result, consumed) = parse_one(remaining(ps), opts); + ps.offset += consumed; + let patch = match result { + Ok(patch) => patch, + Err(e) => return Some(Err(e.into())), + }; + + // FIXME: error spans point at `diff --git` line, not the specific offending line + let operation = match extract_file_op_gitdiff(&header, &patch) { + Ok(op) => op, + Err(mut e) => { + e.set_span(abs_patch_start..abs_patch_start); + return Some(Err(e)); + } + }; - result + // FIXME: error spans point at `diff --git` line, not the specific offending line + let (old_mode, new_mode) = match parse_file_modes(&header) { + Ok(modes) => modes, + Err(mut e) => { + e.set_span(abs_patch_start..abs_patch_start); + return Some(Err(e)); + } + }; + + Some(Ok(FilePatch::new(operation, patch, old_mode, new_mode))) +} + +fn next_unidiff_patch<'a, T: Text + ?Sized>( + ps: &mut PatchSet<'a, T>, +) -> Option, PatchSetParseError>> { + let remaining = remaining(ps); + if remaining.is_empty() { + return None; } + + let patch_start = find_patch_start(remaining)?; + ps.found_any = true; + + let (_, patch_input) = remaining.split_at(patch_start); + + let opts = crate::patch::parse::ParseOpts::default(); + let (result, consumed) = parse_one(patch_input, opts); + // Always advance so the iterator makes progress even on error. + let abs_patch_start = ps.offset + patch_start; + ps.offset += patch_start + consumed; + + let patch = match result { + Ok(patch) => patch, + Err(e) => return Some(Err(e.into())), + }; + let operation = match extract_file_op_unidiff(patch.original_path(), patch.modified_path()) { + Ok(op) => op, + Err(mut e) => { + e.set_span(abs_patch_start..abs_patch_start); + return Some(Err(e)); + } + }; + + Some(Ok(FilePatch::new(operation, patch, None, None))) } /// Finds the byte offset of the first patch header in the input. /// /// A patch header starts with `--- ` or `+++ ` (the file path lines). /// Returns `None` if no header is found. -fn find_patch_start(input: &str) -> Option { +fn find_patch_start(input: &T) -> Option { let mut offset = 0; for line in input.lines() { if line.starts_with(ORIGINAL_PREFIX) || line.starts_with(MODIFIED_PREFIX) { return Some(offset); } offset += line.len(); - // Account for the line ending that `.lines()` strips - if input[offset..].starts_with("\r\n") { - offset += 2; - } else if input[offset..].starts_with('\n') { - offset += 1; - } } None } @@ -166,25 +332,435 @@ fn find_patch_start(input: &str) -> Option { /// > The log message and the patch are separated by a line with a three-dash line. /// /// [`git format-patch`]: https://git-scm.com/docs/git-format-patch -fn strip_email_preamble(input: &str) -> &str { +fn strip_email_preamble(input: &T) -> &T { // only strip preamble for mbox-formatted input if !input.starts_with("From ") { return input; } match input.find(EMAIL_PREAMBLE_SEPARATOR) { - Some(pos) => &input[pos + EMAIL_PREAMBLE_SEPARATOR.len()..], + Some(pos) => { + let (_, rest) = input.split_at(pos + EMAIL_PREAMBLE_SEPARATOR.len()); + rest + } None => input, } } +/// Finds the byte offset of the first `diff --git` line in `input`. +fn find_gitdiff_start(input: &T) -> Option { + let mut offset = 0; + for line in input.lines() { + if line.starts_with("diff --git ") { + return Some(offset); + } + offset += line.len(); + } + None +} + +/// Git extended header metadata. +/// +/// Extracted from lines between `diff --git` and `---` (or end of patch). +/// See [git-diff format documentation](https://git-scm.com/docs/diff-format). +#[derive(Debug, Default)] +struct GitHeader<'a> { + /// Raw content after "diff --git " prefix. + /// + /// Only parsed in fallback when `---`/`+++` is absent (mode-only, binary, empty file). + diff_git_line: Option<&'a str>, + /// Source path from `rename from `. + rename_from: Option<&'a str>, + /// Destination path from `rename to `. + rename_to: Option<&'a str>, + /// Source path from `copy from `. + copy_from: Option<&'a str>, + /// Destination path from `copy to `. + copy_to: Option<&'a str>, + /// File mode from `old mode `. + old_mode: Option<&'a str>, + /// File mode from `new mode `. + new_mode: Option<&'a str>, + /// File mode from `new file mode `. + new_file_mode: Option<&'a str>, + /// File mode from `deleted file mode `. + deleted_file_mode: Option<&'a str>, + /// Whether this is a binary diff with no actual patch content. + /// + /// Observed `git diff` output (without `--binary`): + /// + /// ```text + /// diff --git a/image.png b/image.png + /// new file mode 100644 + /// index 0000000..7c4530c + /// Binary files /dev/null and b/image.png differ + /// ``` + is_binary_marker: bool, + /// Byte offset of `"GIT binary patch"` line relative to header input, + /// or `None` if no binary patch content was found. + /// + /// Observed `git diff --binary` output: + /// + /// ```text + /// diff --git a/image.png b/image.png + /// new file mode 100644 + /// index 0000000..7c4530c + /// GIT binary patch + /// literal 67 + /// zcmV-J0KET+... + /// + /// literal 0 + /// KcmV+b0RR6000031 + /// ``` + binary_patch_offset: Option, +} + +impl<'a> GitHeader<'a> { + /// Parses git extended headers incrementally from the current position. + /// + /// Consumes the `diff --git` line and all recognized extended header lines, + /// stopping at the first unrecognized line (typically `---`/`+++`/`@@` + /// or the next `diff --git`). + /// + /// Returns the parsed header and the number of bytes consumed. + /// + /// Assumption: Header lines are always ASCII + /// (with `core.quotePath=true`, git's default). + fn parse(input: &'a T) -> (Self, usize) { + let mut header = GitHeader::default(); + let mut consumed = 0; + + for line in input.lines() { + let trimmed = strip_line_ending(line); + let Some(s) = trimmed.as_str() else { + // Non-UTF-8 line — not a recognized header + break; + }; + + if let Some(rest) = s.strip_prefix("diff --git ") { + // Only accept the first `diff --git` line. + // A second one means we've reached the next patch. + if header.diff_git_line.is_some() { + break; + } + header.diff_git_line = Some(rest); + } else if let Some(path) = s.strip_prefix("rename from ") { + header.rename_from = Some(path); + } else if let Some(path) = s.strip_prefix("rename to ") { + header.rename_to = Some(path); + } else if let Some(path) = s.strip_prefix("copy from ") { + header.copy_from = Some(path); + } else if let Some(path) = s.strip_prefix("copy to ") { + header.copy_to = Some(path); + } else if let Some(mode) = s.strip_prefix("old mode ") { + header.old_mode = Some(mode); + } else if let Some(mode) = s.strip_prefix("new mode ") { + header.new_mode = Some(mode); + } else if let Some(mode) = s.strip_prefix("new file mode ") { + header.new_file_mode = Some(mode); + } else if let Some(mode) = s.strip_prefix("deleted file mode ") { + header.deleted_file_mode = Some(mode); + } else if s.starts_with("index ") + || s.starts_with("similarity index ") + || s.starts_with("dissimilarity index ") + { + // Recognized but nothing to extract. + } else if s.starts_with("Binary files ") { + header.is_binary_marker = true; + } else if s.starts_with("GIT binary patch") { + header.binary_patch_offset = Some(consumed); + } else { + // Unrecognized line: End of extended headers + // (typically `---`/`+++`/`@@` or trailing content). + break; + } + + consumed += line.len(); + } + + (header, consumed) + } +} + +/// Determines the file operation from git headers and patch paths. +fn extract_file_op_gitdiff<'a, T: Text + ?Sized>( + header: &GitHeader<'a>, + patch: &Patch<'a, T>, +) -> Result, PatchSetParseError> { + // Git headers are authoritative for rename/copy + if let (Some(from), Some(to)) = (header.rename_from, header.rename_to) { + return Ok(FileOperation::Rename { + from: Cow::Borrowed(from), + to: Cow::Borrowed(to), + }); + } + if let (Some(from), Some(to)) = (header.copy_from, header.copy_to) { + return Ok(FileOperation::Copy { + from: Cow::Borrowed(from), + to: Cow::Borrowed(to), + }); + } + + // Try ---/+++ paths first + if patch.original().is_some() || patch.modified().is_some() { + return extract_file_op_unidiff(patch.original_path(), patch.modified_path()); + } + + // Fall back to `diff --git ` for mode-only and empty file changes + let Some((original, modified)) = header.diff_git_line.and_then(parse_diff_git_path) else { + return Err(PatchSetParseErrorKind::InvalidDiffGitPath.into()); + }; + + if header.new_file_mode.is_some() { + Ok(FileOperation::Create(modified)) + } else if header.deleted_file_mode.is_some() { + Ok(FileOperation::Delete(original)) + } else { + Ok(FileOperation::Modify { original, modified }) + } +} + +/// Extracts file operation for binary patches (no ---/+++ headers). +fn extract_file_op_binary<'a>( + header: &GitHeader<'a>, +) -> Result, PatchSetParseError> { + // Git headers are authoritative for rename/copy + if let (Some(from), Some(to)) = (header.rename_from, header.rename_to) { + return Ok(FileOperation::Rename { + from: Cow::Borrowed(from), + to: Cow::Borrowed(to), + }); + } + if let (Some(from), Some(to)) = (header.copy_from, header.copy_to) { + return Ok(FileOperation::Copy { + from: Cow::Borrowed(from), + to: Cow::Borrowed(to), + }); + } + + // Use `diff --git ` for binary patches. + let Some((original, modified)) = header.diff_git_line.and_then(parse_diff_git_path) else { + return Err(PatchSetParseErrorKind::InvalidDiffGitPath.into()); + }; + + if header.new_file_mode.is_some() { + Ok(FileOperation::Create(modified)) + } else if header.deleted_file_mode.is_some() { + Ok(FileOperation::Delete(original)) + } else { + Ok(FileOperation::Modify { original, modified }) + } +} + +/// Parses file modes from git extended headers. +fn parse_file_modes( + header: &GitHeader<'_>, +) -> Result<(Option, Option), PatchSetParseError> { + let old_mode = header + .old_mode + .or(header.deleted_file_mode) + .map(str::parse::) + .transpose()?; + let new_mode = header + .new_mode + .or(header.new_file_mode) + .map(str::parse::) + .transpose()?; + Ok((old_mode, new_mode)) +} + +/// Extracts both old and new paths from `diff --git` line content. +/// +/// ## Assumption #1: old and new paths are the same +/// +/// This extraction has one strong assumption: +/// Beside their prefixes, old and new paths are the same. +/// +/// From [git-diff format documentation]: +/// +/// > The `a/` and `b/` filenames are the same unless rename/copy is involved. +/// > Especially, even for a creation or a deletion, `/dev/null` is not used +/// > in place of the `a/` or `b/` filenames. +/// > +/// > When a rename/copy is involved, file1 and file2 show the name of the +/// > source file of the rename/copy and the name of the file that the +/// > rename/copy produces, respectively. +/// +/// Since rename/copy operations use `rename from/to` and `copy from/to` headers +/// we have handled earlier in [`extract_file_op_gitdiff`], +/// (which have no `a/`/`b/` prefix per git spec), +/// +/// this extraction is only used +/// * when unified diff headers (`---`/`+++`) are absent +/// * Only for mode-only and empty file cases +/// +/// [git-diff format documentation]: https://git-scm.com/docs/diff-format +/// +/// ## Assumption #2: the longest common path suffix is the shared path +/// +/// When custom prefixes contain spaces, +/// multiple splits may produce valid path suffixes. +/// +/// Example: `src/foo.rs src/foo.rs src/foo.rs src/foo.rs` +/// +/// Three splits all produce valid path suffixes (contain `/`): +/// +/// * Position 10 +/// * old path: `src/foo.rs` +/// * new path: `src/foo.rs src/foo.rs src/foo.rs` +/// * common suffix: `foo.rs` +/// * Position 21 +/// * old path: `src/foo.rs src/foo.rs` +/// * new path: `src/foo.rs src/foo.rs` +/// * common suffix: `foo.rs src/foo.rs` +/// * Position 32 +/// * old path: `src/foo.rs src/foo.rs src/foo.rs` +/// * new path: `src/foo.rs` +/// * common suffix: `foo.rs` +/// +/// We observed that `git apply` would pick position 21, +/// which has the longest path suffix, +/// hence this heuristic. +/// +/// ## Supported formats +/// +/// * `a/ b/` (default prefix) +/// * ` ` (`git diff --no-prefix`) +/// * ` ` (custom prefix) +/// * `"" ""` (quoted, with escapes) +/// * Mixed quoted/unquoted +fn parse_diff_git_path(line: &str) -> Option<(Cow<'_, str>, Cow<'_, str>)> { + if line.starts_with('"') || line.ends_with('"') { + parse_quoted_diff_git_path(line) + } else { + parse_unquoted_diff_git_path(line) + } +} + +/// See [`parse_diff_git_path`]. +fn parse_unquoted_diff_git_path(line: &str) -> Option<(Cow<'_, str>, Cow<'_, str>)> { + let mut best_match = None; + let mut longest_path = ""; + + for (i, _) in line.match_indices(' ') { + let left = &line[..i]; + let right = &line[i + 1..]; + if left.is_empty() || right.is_empty() { + continue; + } + // Select split with longest common path suffix (matches Git behavior) + if let Some(path) = longest_common_path_suffix(left, right) { + if path.len() > longest_path.len() { + longest_path = path; + best_match = Some((left, right)); + } + } + } + + best_match.map(|(l, r)| (Cow::Borrowed(l), Cow::Borrowed(r))) +} + +/// See [`parse_diff_git_path`]. +fn parse_quoted_diff_git_path(line: &str) -> Option<(Cow<'_, str>, Cow<'_, str>)> { + let (left_raw, right_raw) = if line.starts_with('"') { + // First token is quoted. + let bytes = line.as_bytes(); + let mut i = 1; // skip starting `"` + let end = loop { + match bytes.get(i)? { + b'"' => break i + 1, + b'\\' => i += 2, + _ => i += 1, + } + }; + let (first, rest) = line.split_at(end); + let rest = rest.strip_prefix(' ')?; + (first, rest) + } else if let Some(pos) = line.find(" \"") { + // First token is unquoted. The second must be quoted. + (&line[..pos], &line[pos + 1..]) + } else { + // Malformed: ends with `"` but no valid quoted path found + return None; + }; + + let left = escaped_filename(left_raw).ok().and_then(|cow| match cow { + Cow::Borrowed(b) => std::str::from_utf8(b).ok().map(Cow::Borrowed), + Cow::Owned(v) => String::from_utf8(v).ok().map(Cow::Owned), + })?; + let right = escaped_filename(right_raw).ok().and_then(|cow| match cow { + Cow::Borrowed(b) => std::str::from_utf8(b).ok().map(Cow::Borrowed), + Cow::Owned(v) => String::from_utf8(v).ok().map(Cow::Owned), + })?; + + // Verify both sides share the same path. + longest_common_path_suffix(&left, &right)?; + Some((left, right)) +} + +/// Extracts the longest common path suffix starting at a path component boundary. +/// +/// Returns `None` if no valid common path exists. +/// +/// Path component boundary means: +/// +/// * At `/` character (e.g., `foo/bar.rs` vs `fooo/bar.rs` → `bar.rs`) +/// * Or the entire string is identical +fn longest_common_path_suffix<'a>(a: &'a str, b: &'a str) -> Option<&'a str> { + if a.is_empty() || b.is_empty() { + return None; + } + + let suffix_len = a + .as_bytes() + .iter() + .rev() + .zip(b.as_bytes().iter().rev()) + .take_while(|(x, y)| x == y) + .count(); + + if suffix_len == 0 { + return None; + } + + // Identical strings + if suffix_len == a.len() && a.len() == b.len() { + return Some(a); + } + + // Find first '/' in suffix and return path after it + let suffix_start = a.len() - suffix_len; + let suffix = &a[suffix_start..]; + suffix + .split_once('/') + .map(|(_, path)| path) + .filter(|p| !p.is_empty()) +} + /// Extracts the file operation from a patch based on its header paths. -pub(crate) fn extract_file_op_unidiff<'a>( - original: Option<&Cow<'a, str>>, - modified: Option<&Cow<'a, str>>, +/// +/// Converts paths from `Cow<'a, T>` to `Cow<'a, str>` via [`Text::as_str`], +/// since [`FileOperation`] always uses `Cow` for paths. +fn extract_file_op_unidiff<'a, T: Text + ?Sized>( + original: Option<&Cow<'a, T>>, + modified: Option<&Cow<'a, T>>, ) -> Result, PatchSetParseError> { - let is_create = original.map(Cow::as_ref) == Some(DEV_NULL); - let is_delete = modified.map(Cow::as_ref) == Some(DEV_NULL); + let to_str = |cow: &Cow<'a, T>| -> Option> { + match cow { + Cow::Borrowed(b) => b.as_str().map(Cow::Borrowed), + Cow::Owned(_) => cow.as_ref().as_str().map(|s| Cow::Owned(s.to_owned())), + } + }; + // TODO: support non-UTF8 paths + let original = original + .map(|cow| to_str(cow).ok_or(PatchSetParseErrorKind::InvalidUtf8Path)) + .transpose()?; + let modified = modified + .map(|cow| to_str(cow).ok_or(PatchSetParseErrorKind::InvalidUtf8Path)) + .transpose()?; + + let is_create = original.as_deref() == Some(DEV_NULL); + let is_delete = modified.as_deref() == Some(DEV_NULL); if is_create && is_delete { return Err(PatchSetParseErrorKind::BothDevNull.into()); @@ -192,32 +768,44 @@ pub(crate) fn extract_file_op_unidiff<'a>( if is_delete { let path = original.ok_or(PatchSetParseErrorKind::DeleteMissingOriginalPath)?; - Ok(FileOperation::Delete(path.clone())) + Ok(FileOperation::Delete(path)) } else if is_create { let path = modified.ok_or(PatchSetParseErrorKind::CreateMissingModifiedPath)?; - Ok(FileOperation::Create(path.clone())) + Ok(FileOperation::Create(path)) } else { match (original, modified) { - (Some(original), Some(modified)) => Ok(FileOperation::Modify { - original: original.clone(), - modified: modified.clone(), - }), + (Some(original), Some(modified)) => Ok(FileOperation::Modify { original, modified }), (None, Some(modified)) => { // No original path, but has modified path. // Observed that GNU patch reads from the modified path in this case. Ok(FileOperation::Modify { original: modified.clone(), - modified: modified.clone(), + modified, }) } (Some(original), None) => { // No modified path, but has original path. Ok(FileOperation::Modify { modified: original.clone(), - original: original.clone(), + original, }) } (None, None) => Err(PatchSetParseErrorKind::NoFilePath.into()), } } } + +/// Strips the trailing `\n` from a line yielded by [`Text::lines`]. +/// +/// [`Text::lines`] includes line endings; strip for matching. +fn strip_line_ending(line: &T) -> &T { + // TODO: GNU patch strips trailing CRs from CRLF patches automatically. + // We should consider adding compat tests for GNU patch. + // And `git apply` seems to reject. Worth adding tests as well. + line.strip_suffix("\n").unwrap_or(line) +} + +fn remaining<'a, T: Text + ?Sized>(ps: &PatchSet<'a, T>) -> &'a T { + let (_, rest) = ps.input.split_at(ps.offset); + rest +} diff --git a/src/patch_set/tests.rs b/src/patch_set/tests.rs index 5bbfd316..5a9951ea 100644 --- a/src/patch_set/tests.rs +++ b/src/patch_set/tests.rs @@ -463,3 +463,196 @@ In a hole in the ground there lived a hobbit ); } } + +mod patchset_gitdiff { + use super::*; + use crate::patch_set::PatchKind; + + fn parse_gitdiff(input: &str) -> Vec> { + PatchSet::parse(input, ParseOptions::gitdiff()) + .collect::, _>>() + .unwrap() + } + + /// `parse_one` must stop at `diff --git` boundaries so that + /// back-to-back patches are split correctly. + /// Without this, the second patch's `diff --git` line would be + /// swallowed as trailing junk by the first patch's hunk parser. + #[test] + fn multi_file_stops_at_diff_git_boundary() { + let input = "\ +diff --git a/foo b/foo +--- a/foo ++++ b/foo +@@ -1 +1 @@ +-old foo ++new foo +diff --git a/bar b/bar +--- a/bar ++++ b/bar +@@ -1 +1 @@ +-old bar ++new bar +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 2); + } + + #[test] + fn pure_rename() { + let input = "\ +diff --git a/old.rs b/new.rs +similarity index 100% +rename from old.rs +rename to new.rs +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert_eq!( + patches[0].operation(), + &FileOperation::Rename { + from: "old.rs".into(), + to: "new.rs".into(), + } + ); + } + + /// Empty file creation has no ---/+++ headers, so the path comes + /// from the `diff --git` line and retains the `b/` prefix. + /// Callers use `strip_prefix(1)` to remove it. + #[test] + fn new_empty_file() { + let input = "\ +diff --git a/empty b/empty +new file mode 100644 +index 0000000..e69de29 +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert_eq!( + patches[0].operation(), + &FileOperation::Create("b/empty".into()) + ); + match patches[0].patch() { + PatchKind::Text(p) => assert!(p.hunks().is_empty()), + PatchKind::Binary(_) => panic!("expected text patch"), + } + } + + #[test] + fn rename_then_modify() { + // Rename with no hunks followed by a modify with hunks. + // Tests that offset advances correctly across both. + let input = "\ +diff --git a/old.rs b/new.rs +similarity index 100% +rename from old.rs +rename to new.rs +diff --git a/foo b/foo +--- a/foo ++++ b/foo +@@ -1 +1 @@ +-old ++new +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 2); + assert!(matches!( + patches[0].operation(), + FileOperation::Rename { .. } + )); + assert!(matches!( + patches[1].operation(), + FileOperation::Modify { .. } + )); + } + + #[test] + fn binary_marker_kept_by_default() { + // Default is Keep: binary marker is returned as BinaryPatch::Marker. + let input = "\ +diff --git a/img.png b/img.png +Binary files a/img.png and b/img.png differ +diff --git a/foo b/foo +--- a/foo ++++ b/foo +@@ -1 +1 @@ +-old ++new +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 2); + assert!(patches[0].patch().as_binary().is_some()); + assert!(patches[1].patch().as_text().is_some()); + } + + #[test] + fn binary_skip_does_not_stall() { + // Binary marker must be skipped and offset must advance + // to the next patch without infinite loop. + let input = "\ +diff --git a/img.png b/img.png +Binary files a/img.png and b/img.png differ +diff --git a/foo b/foo +--- a/foo ++++ b/foo +@@ -1 +1 @@ +-old ++new +"; + let patches: Vec<_> = PatchSet::parse(input, ParseOptions::gitdiff().skip_binary()) + .collect::>() + .unwrap(); + assert_eq!(patches.len(), 1, "binary should be skipped, text parsed"); + } +} + +mod patchset_parse_bytes { + use super::*; + + #[test] + fn non_utf8_hunk_content() { + // Hunk lines contain invalid UTF-8 bytes (0x80, 0xff). + let mut input: Vec = Vec::new(); + input.extend(b"--- a/file.bin\n"); + input.extend(b"+++ b/file.bin\n"); + input.extend(b"@@ -1,3 +1,3 @@\n"); + input.extend(b" valid ascii\n"); + input.extend(b"-old \x80\xff bytes\n"); + input.extend(b"+new \x80\xff bytes\n"); + input.extend(b" more ascii\n"); + + let patches = PatchSet::parse_bytes(&input, ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 1); + + let patch = patches[0].patch().as_text().expect("expected text patch"); + let hunks = patch.hunks(); + assert_eq!(hunks.len(), 1); + + use crate::patch::Line; + let lines = hunks[0].lines(); + assert_eq!(lines.len(), 4); + assert_eq!(lines[0], Line::Context(b"valid ascii\n" as &[u8])); + assert_eq!(lines[1], Line::Delete(b"old \x80\xff bytes\n" as &[u8])); + assert_eq!(lines[2], Line::Insert(b"new \x80\xff bytes\n" as &[u8])); + assert_eq!(lines[3], Line::Context(b"more ascii\n" as &[u8])); + } + + #[test] + fn non_utf8_path_returns_error() { + // File paths (currently) must be valid UTF-8 even in byte mode. + let mut input: Vec = Vec::new(); + input.extend(b"--- a/bad\xffpath\n"); + input.extend(b"+++ b/good\n"); + input.extend(b"@@ -1 +1 @@\n"); + input.extend(b"-old\n"); + input.extend(b"+new\n"); + + let result: Result, _> = + PatchSet::parse_bytes(&input, ParseOptions::unidiff()).collect(); + let err = result.unwrap_err(); + assert_eq!(err.kind, PatchSetParseErrorKind::InvalidUtf8Path); + } +} diff --git a/src/utils.rs b/src/utils.rs index c3b15944..60470506 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -132,7 +132,7 @@ impl<'a, T: Text + ?Sized> Iterator for LineIter<'a, T> { /// A helper trait for processing text like `str` and `[u8]` /// Useful for abstracting over those types for parsing as well as breaking input into lines -pub trait Text: Eq + Hash { +pub trait Text: Eq + Hash + ToOwned { fn is_empty(&self) -> bool; fn len(&self) -> usize; fn starts_with(&self, prefix: &str) -> bool; @@ -144,10 +144,18 @@ pub trait Text: Eq + Hash { fn find(&self, needle: &str) -> Option; fn split_at(&self, mid: usize) -> (&Self, &Self); fn as_str(&self) -> Option<&str>; + /// Returns the longest valid UTF-8 prefix. + /// + /// For `str` this is the entire input. + /// For `[u8]` this returns up to the first invalid UTF-8 byte. + fn as_str_prefix(&self) -> &str; fn as_bytes(&self) -> &[u8]; #[allow(unused)] fn lines(&self) -> LineIter<'_, Self>; + /// Converts a byte-oriented `Cow` into a `Cow` of `Self`. + fn from_bytes_cow(cow: Cow<'_, [u8]>) -> Cow<'_, Self>; + fn parse(&self) -> Option { self.as_str().and_then(|s| s.parse().ok()) } @@ -195,6 +203,10 @@ impl Text for str { Some(self) } + fn as_str_prefix(&self) -> &str { + self + } + fn as_bytes(&self) -> &[u8] { self.as_bytes() } @@ -202,6 +214,13 @@ impl Text for str { fn lines(&self) -> LineIter<'_, Self> { LineIter::new(self) } + + fn from_bytes_cow(cow: Cow<'_, [u8]>) -> Cow<'_, Self> { + match cow { + Cow::Borrowed(b) => Cow::Borrowed(std::str::from_utf8(b).unwrap()), + Cow::Owned(o) => Cow::Owned(String::from_utf8(o).unwrap()), + } + } } impl Text for [u8] { @@ -245,6 +264,13 @@ impl Text for [u8] { std::str::from_utf8(self).ok() } + fn as_str_prefix(&self) -> &str { + match std::str::from_utf8(self) { + Ok(s) => s, + Err(e) => std::str::from_utf8(&self[..e.valid_up_to()]).unwrap(), + } + } + fn as_bytes(&self) -> &[u8] { self } @@ -252,6 +278,10 @@ impl Text for [u8] { fn lines(&self) -> LineIter<'_, Self> { LineIter::new(self) } + + fn from_bytes_cow(cow: Cow<'_, [u8]>) -> Cow<'_, Self> { + cow + } } fn find_bytes(haystack: &[u8], needle: &[u8]) -> Option { @@ -292,7 +322,7 @@ fn find_byte(haystack: &[u8], byte: u8) -> Option { /// /// See [`byte_needs_quoting`] for the set of characters that /// require quoting. -pub(crate) fn escaped_filename( +pub(crate) fn escaped_filename( filename: &T, ) -> Result, ParsePatchError> { if let Some(inner) = filename @@ -309,9 +339,7 @@ pub(crate) fn escaped_filename( } } -fn decode_escaped( - escaped: &T, -) -> Result, ParsePatchError> { +fn decode_escaped(escaped: &T) -> Result, ParsePatchError> { let bytes = escaped.as_bytes(); let mut result = Vec::new(); let mut i = 0; diff --git a/tests/compat/common.rs b/tests/compat/common.rs index 7bc6f37a..83f27ad9 100644 --- a/tests/compat/common.rs +++ b/tests/compat/common.rs @@ -2,16 +2,30 @@ use std::{ fs, + io::Write, path::{Path, PathBuf}, - process::Command, + process::{Command, Stdio}, sync::Once, }; -use diffy::patch_set::{FileOperation, ParseOptions, PatchKind, PatchSet, PatchSetParseError}; +use diffy::{ + binary::{BinaryPatch, BinaryPatchParseError}, + patch_set::{FileOperation, ParseOptions, PatchKind, PatchSet, PatchSetParseError}, +}; + +/// Which external tool to compare against. +#[derive(Clone, Copy)] +pub enum CompatMode { + /// `git apply` with `ParseOptions::gitdiff()` + Git, + /// GNU `patch` with `ParseOptions::unidiff()` + GnuPatch, +} /// A test case with fluent builder API. pub struct Case<'a> { case_name: &'a str, + mode: CompatMode, /// Strip level for path prefixes (default: 0) strip_level: u32, /// Whether diffy is expected to succeed (default: true) @@ -21,20 +35,37 @@ pub struct Case<'a> { } impl<'a> Case<'a> { + /// Create a test case for `git apply` comparison. + pub fn git(name: &'a str) -> Self { + Self { + case_name: name, + mode: CompatMode::Git, + strip_level: 0, + expect_success: true, + expect_compat: true, + } + } + /// Create a test case for GNU patch comparison. pub fn gnu_patch(name: &'a str) -> Self { Self { case_name: name, + mode: CompatMode::GnuPatch, strip_level: 0, expect_success: true, expect_compat: true, } } - /// Get the case directory path. + /// Get the case directory path based on mode. fn case_dir(&self) -> PathBuf { + let subdir = match self.mode { + CompatMode::Git => "git", + CompatMode::GnuPatch => "gnu_patch", + }; PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("tests/compat/gnu_patch") + .join("tests/compat") + .join(subdir) .join(self.case_name) } @@ -58,16 +89,23 @@ impl<'a> Case<'a> { let case_dir = self.case_dir(); let in_dir = case_dir.join("in"); let patch_path = in_dir.join("foo.patch"); - let patch = fs::read_to_string(&patch_path) + let patch = fs::read(&patch_path) .unwrap_or_else(|e| panic!("failed to read {}: {e}", patch_path.display())); let case_name = self.case_name; + let prefix = match self.mode { + CompatMode::Git => "git", + CompatMode::GnuPatch => "gnu", + }; let temp_base = temp_base(); - let diffy_output = temp_base.join(format!("gnu-{case_name}-diffy")); + let diffy_output = temp_base.join(format!("{prefix}-{case_name}-diffy")); create_output_dir(&diffy_output); - let opts = ParseOptions::unidiff(); + let opts = match self.mode { + CompatMode::Git => ParseOptions::gitdiff(), + CompatMode::GnuPatch => ParseOptions::unidiff(), + }; // Apply with diffy let diffy_result = apply_diffy(&in_dir, &patch, &diffy_output, opts, self.strip_level); @@ -81,12 +119,19 @@ impl<'a> Case<'a> { // In CI mode, also verify external tool behavior if is_ci() { - let external_output = temp_base.join(format!("gnu-{case_name}-external")); + let external_output = temp_base.join(format!("{prefix}-{case_name}-external")); create_output_dir(&external_output); - print_patch_version(); - let external_result = - gnu_patch_apply(&in_dir, &patch_path, &external_output, self.strip_level); + let external_result = match self.mode { + CompatMode::Git => { + print_git_version(); + git_apply(&external_output, &patch, self.strip_level, &in_dir) + } + CompatMode::GnuPatch => { + print_patch_version(); + gnu_patch_apply(&in_dir, &patch_path, &external_output, self.strip_level) + } + }; // For success cases where both succeed and are expected to be compatible, // verify outputs match @@ -120,6 +165,34 @@ impl<'a> Case<'a> { // External tool invocations +fn git_apply( + output_dir: &Path, + patch: &[u8], + strip_level: u32, + in_dir: &Path, +) -> Result<(), String> { + copy_input_files(in_dir, output_dir, &["patch"]); + + let mut cmd = Command::new("git"); + cmd.env("GIT_CONFIG_NOSYSTEM", "1"); + cmd.env("GIT_CONFIG_GLOBAL", "/dev/null"); + cmd.current_dir(output_dir); + cmd.args(["apply", &format!("-p{strip_level}"), "-"]); + cmd.stdin(Stdio::piped()); + cmd.stdout(Stdio::piped()); + cmd.stderr(Stdio::piped()); + + let mut child = cmd.spawn().expect("failed to spawn git apply"); + child.stdin.as_mut().unwrap().write_all(patch).unwrap(); + + let output = child.wait_with_output().unwrap(); + if output.status.success() { + Ok(()) + } else { + Err(String::from_utf8_lossy(&output.stderr).to_string()) + } +} + fn gnu_patch_apply( in_dir: &Path, patch_path: &Path, @@ -149,6 +222,24 @@ fn gnu_patch_apply( } } +fn print_git_version() { + static ONCE: Once = Once::new(); + ONCE.call_once(|| { + let output = Command::new("git").arg("--version").output(); + match output { + Ok(o) if o.status.success() => { + let version = String::from_utf8_lossy(&o.stdout); + eprintln!( + "git version: {}", + version.lines().next().unwrap_or("unknown") + ); + } + Ok(o) => eprintln!("git --version failed: {}", o.status), + Err(e) => eprintln!("git command not found: {e}"), + } + }); +} + fn print_patch_version() { static ONCE: Once = Once::new(); ONCE.call_once(|| { @@ -172,6 +263,7 @@ fn print_patch_version() { pub enum TestError { Parse(PatchSetParseError), Apply(diffy::ApplyError), + Binary(BinaryPatchParseError), } impl std::fmt::Display for TestError { @@ -179,6 +271,7 @@ impl std::fmt::Display for TestError { match self { TestError::Parse(e) => write!(f, "parse error: {e}"), TestError::Apply(e) => write!(f, "apply error: {e}"), + TestError::Binary(e) => write!(f, "binary patch error: {e}"), } } } @@ -233,12 +326,12 @@ fn copy_input_files_impl(src: &Path, dst: &Path, base: &Path, skip_extensions: & /// Apply patch using diffy to output directory. pub fn apply_diffy( in_dir: &Path, - patch: &str, + patch: &[u8], output_dir: &Path, opts: ParseOptions, strip_prefix: u32, ) -> Result<(), TestError> { - let patches: Vec<_> = PatchSet::parse(patch, opts) + let patches: Vec<_> = PatchSet::parse_bytes(patch, opts) .collect::>() .map_err(TestError::Parse)?; @@ -256,24 +349,40 @@ pub fn apply_diffy( } }; + let read_original = || { + if let Some(name) = original_name { + let original_path = in_dir.join(name); + fs::read(&original_path).unwrap_or_default() + } else { + Vec::new() + } + }; + + let write_modified = |result: &[u8]| { + let result_path = output_dir.join(target_name); + if let Some(parent) = result_path.parent() { + fs::create_dir_all(parent).unwrap(); + } + fs::write(&result_path, result).unwrap(); + }; + match file_patch.patch() { PatchKind::Text(patch) => { - let original = if let Some(name) = original_name { - let original_path = in_dir.join(name); - fs::read_to_string(&original_path).unwrap_or_else(|e| { - panic!("failed to read {}: {e}", original_path.display()) - }) - } else { - String::new() - }; - - let result = diffy::apply(&original, patch).map_err(TestError::Apply)?; - - let result_path = output_dir.join(target_name); - if let Some(parent) = result_path.parent() { - fs::create_dir_all(parent).unwrap(); - } - fs::write(&result_path, result.as_bytes()).unwrap(); + let original = read_original(); + + let result = diffy::apply_bytes(&original, patch).map_err(TestError::Apply)?; + + write_modified(&result); + } + PatchKind::Binary(BinaryPatch::Marker) => { + // Dont do anything if it is just a binary patch marker. + } + PatchKind::Binary(patch) => { + let original = read_original(); + + let result = patch.apply(&original).map_err(TestError::Binary)?; + + write_modified(&result); } } } diff --git a/tests/compat/git/binary_and_text_mixed/in/foo.patch b/tests/compat/git/binary_and_text_mixed/in/foo.patch new file mode 100644 index 00000000..9ae73cb9 --- /dev/null +++ b/tests/compat/git/binary_and_text_mixed/in/foo.patch @@ -0,0 +1,16 @@ +diff --git a/image.png b/image.png +index 0dd1608e45a9c4d35bfc1e6f266a796364aa8754..bccac03558b00545e7ea8ced4a3a1ee232cc185a 100644 +GIT binary patch +literal 10 +UcmV+l0QLWgP)0RjUA1qKHQ2>`DEE&u=k + +literal 0 +KcmV+b0RR6000031 + diff --git a/tests/compat/git/binary_literal/out/small.bin b/tests/compat/git/binary_literal/out/small.bin new file mode 100644 index 00000000..df93f5f3 Binary files /dev/null and b/tests/compat/git/binary_literal/out/small.bin differ diff --git a/tests/compat/git/binary_literal_wrong_original/in/file.bin b/tests/compat/git/binary_literal_wrong_original/in/file.bin new file mode 100644 index 00000000..605cc407 --- /dev/null +++ b/tests/compat/git/binary_literal_wrong_original/in/file.bin @@ -0,0 +1 @@ +WRONG CONTENT - not matching the hash in patch \ No newline at end of file diff --git a/tests/compat/git/binary_literal_wrong_original/in/foo.patch b/tests/compat/git/binary_literal_wrong_original/in/foo.patch new file mode 100644 index 00000000..d3e39d97 --- /dev/null +++ b/tests/compat/git/binary_literal_wrong_original/in/foo.patch @@ -0,0 +1,8 @@ +diff --git a/file.bin b/file.bin +index df93f5f3f72487244976c34e85525cf445016566..a8a025a84f32c7552841b683361d03d3e171294d 100644 +GIT binary patch +literal 10 +UcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k + +literal 10 +UcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k diff --git a/tests/compat/git/binary_literal_wrong_original/out/file.bin b/tests/compat/git/binary_literal_wrong_original/out/file.bin new file mode 100644 index 00000000..df93f5f3 Binary files /dev/null and b/tests/compat/git/binary_literal_wrong_original/out/file.bin differ diff --git a/tests/compat/git/binary_mixed_delta_literal/in/favicon.png b/tests/compat/git/binary_mixed_delta_literal/in/favicon.png new file mode 100644 index 00000000..5109c1de Binary files /dev/null and b/tests/compat/git/binary_mixed_delta_literal/in/favicon.png differ diff --git a/tests/compat/git/binary_mixed_delta_literal/in/foo.patch b/tests/compat/git/binary_mixed_delta_literal/in/foo.patch new file mode 100644 index 00000000..7f52479d --- /dev/null +++ b/tests/compat/git/binary_mixed_delta_literal/in/foo.patch @@ -0,0 +1,86 @@ +diff --git a/favicon.png b/favicon.png +index 5109c1de8bea744180448b347fb44ddef23b90f4..69b8613ce1506e1c92b864f91cc46df06348c62b 100644 +GIT binary patch +delta 1115 +zcmV-h1f=`t7Uc+#8Gi%-007x@vVQ;o00d`2O+f$vv5yP +zfP?@5`Tzg`fam}Kbua(`>RI+y?e7jT@qQ9J+u00Lr5M??VshmXv^00009a7bBm +z000DZ000DZ0m8#+P5=M^2XskIMF-{q4Gj?pmKmEP0008eNq +zS*l<~QA$1JAY?<@4F!`%H{0#bjE9}fZuU?Ayt&Np_x{ZHeczk+=q00aIO{BOMm>NW +zGb^R2YFbUrw129ilv&5_?R*#op%sCs1zv6w<_>da(*43C8z5GS?N}X +z{^V`QxQx)j*!`A8b;E|$ExHA=4hUt88|nlQ7tEk9hz=YqEbOPg@0Wz`s +zmN!~(9dV=u?I~Mvh)g?mp_)jf?D@Nr{DQJb>GW45GWc;+ig_8yt0(H +zchI)mFn`!(GAY|t|C#flvV9n{2WCVfKibO<1Qi=LY`EQ$z;UqGqDW*0&cq!s6My4; +zPXgEE5_G3UA~k24J=gwQb;>^P#8wX*Y*kE)MBeQ{FfT7}!Y4_A1FwN^c9)O_GgkHj~RdK~NC^L5i~IUXh_ +z95o@^6-U;DqjIVuk>ys^VPVp56?GT72DqTE5?dt$r77DZ?T}ZyX3UaF&vYMYG>7U( +z=6{7=Hn2lG;1!X1AGZ(HW&;lyamBia;`62jOIGoOLtacMykp3iKdiS0Xh_|vJ_mY~ +zb4{&%v9!zfYAbcDwo>=i)=iqrbxKXm)PpS7rrhUx{~ymQhCvo@p6LJp03~!qSaf7z +zbY(hYa%Ew3WdJfTGBPbNF)cAOR53F;F@HBYFgPnPFgh?WYdBZ^0000bbVXQnWMOn= +zI&E)cX=Zrz>%8FWQhbW?9;ba!EL +zWdL_~cP?peYja~^aAhuUa%Y?FJQ@H109SfcSaechcOYjeH +zXlY1#a%EF`PE=!hYhyWNB0oL~Ja{^IZE$U6bYUQPZES9HI(R)IVPtP&WjbziI&Eci +zVJ{*ecsh7(aCB=uB3MmOAVY6*Wgs;!H7+nBJ_;Z_a%5&YQba}|cx`NMb2@TlW<4Tk +zbaZe!FE4j@cP@7`E^l&YFEKeeIWI6WFETPMa%5&Lb9rubVR$WWb0Z=?3Lqdna%5&Y +zL}hbha%pgMX>V>Ia%5&YVPbD}bUh*>3LqdLAb4$TZgVKLZ*V;#XmoUNIxjD7b1q?IZ(?OG +zV{dIQaA>gzG9n5fARu&UW@b8AQe|^*Y;|;LZ*DyzH!?0T +zA_^cNAarSFW;$6?Wpi(Ab#!TOZapG5GA=M83LqdLaA4UZB6CtlLLf;+LpCuvHa0CXE-@ksARr)k +zZE!kGZ)9m^c|>7!Wj!J?FfuSLFgGnRFjO%&Iy5pmFf}bOH##sdA_^cNAb4$XI!$GC +zVPs)+VMJkcWj!J?FfuSLFgGnRFjO%&Iy5pmFf}bOH##sdB0dTrARs(=ZE#IZI!I}A +zbZ>HbJ_;ZpARs()WM(>3WpO?VARr(hAUtwpW;$$X3LqdLARr)fbVYV_I$>jUX>V>l +zB5-nVWOZX@WFiV6ARr(hAais@c62&(Z)S9NVRB_bXJu}5Jt9G7W@&C|ba@~|Wpim~ +zZe?;HC{1BkLr`;4M?xS;MME|*IW{&eGA=Oy04R}lk;GOVI$ud@Qx+*L$C!pq+mEwKumw3~KnQ4h_;;k4&i4exmIHQaZ +zqL)FLLv#_OTUJ!a@A=K2WJz)rnKf3?StLsilTJ~WrFvRoM)b6dJgav9|Mu0$^aY*j +z-Z0cWS=L)Sc(&ks)3QDfE$jOTsvhB@P|9CAfPr_>H%nSz9~#!-?6RaKci?;jS{}57 +zp7@oj#NC+;yq@B{6@$N$x+0n`AdZ9EPrONx`oPL8d^SdIhl+lpQ;W@unwKsRpO +zc#|V9Z5{cG8F|O!TM*a<%mH87Q8-w?emyNME%f#E;q&>pbLS2d6B8Uec8uxiX~xIL +zQ^j*gMBf4I1foDBQ{-RT#+^tE!_dKl2X*u2P0i2GOOo#2zpwoKe3h1#>dBKQO5*Y3 +z$J)PtzYN20XJZcd;!TDP;60!s^KAyZ*Q-sfow1`GNR3!H+vQ6&eW(=@9S?8tp-3K5WupmOjqW>G);Ew*g<1s +zBYXDj!LlsE;V=~y6+C_VG%5Dd&U7sG>@MJY7O&;n&z@CjX{nlx}S9Ri@dyqq&<&d}A>#oXN73Y%R7yqlE$CFylTLxTnf2c5{gGLQ`& +zIdVj?Sj^FL>((vR)YPQPbR>3Yr1gZLaJO0X!YUx +zE2XA(xfmWEroMjRXwcu^&%nTdThy`bTtRblbH)s`x3?=C4l5iEYiMXl`}Xbg%tR;@ +za`aSJSG)Pn6*Q3;CIo2j-n|(eaCbp88dY&|ahhCHQyXdqOH;gI!vu@%7>Cz=h +zQb$L}QqQgg@YxQ01I)WCWGUFadpE0Bug+F{9%uzf{uF)%UIQ&ji9jGgVPRoLO-Z9` +z)~w<3<;$EteVWP1$ptcFV`D_4(bP{r0>8O+O)YsRi&dyFfd-g1? +zuNTAk__(fKy{d_c3EjJQPpej~N=+C +z_VnHZ85ff*DC1)CCKx34yZ>!9ONDzmeMRQwwANG8TfpZmT+dtW=Vh(u{{qBcL;Juw +R3Jw4O002ovPDHLkV1icrt||Zk + diff --git a/tests/compat/git/binary_mixed_delta_literal/out/favicon.png b/tests/compat/git/binary_mixed_delta_literal/out/favicon.png new file mode 100644 index 00000000..69b8613c Binary files /dev/null and b/tests/compat/git/binary_mixed_delta_literal/out/favicon.png differ diff --git a/tests/compat/git/fail_both_devnull/in/foo.patch b/tests/compat/git/fail_both_devnull/in/foo.patch new file mode 100644 index 00000000..26b28273 --- /dev/null +++ b/tests/compat/git/fail_both_devnull/in/foo.patch @@ -0,0 +1,6 @@ +diff --git a/foo b/foo +--- /dev/null ++++ /dev/null +@@ -1 +1 @@ +-old ++new diff --git a/tests/compat/git/format_patch_diff_in_message/in/file.txt b/tests/compat/git/format_patch_diff_in_message/in/file.txt new file mode 100644 index 00000000..3367afdb --- /dev/null +++ b/tests/compat/git/format_patch_diff_in_message/in/file.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/format_patch_diff_in_message/in/foo.patch b/tests/compat/git/format_patch_diff_in_message/in/foo.patch new file mode 100644 index 00000000..ce333713 --- /dev/null +++ b/tests/compat/git/format_patch_diff_in_message/in/foo.patch @@ -0,0 +1,20 @@ +From 8a14b135fe7ba10bab09a77c4a687faaf1d92a26 Mon Sep 17 00:00:00 2001 +From: Test +Date: Wed, 1 Jan 2020 00:00:00 +0000 +Subject: [PATCH] Fix diff --git parsing bug + +The line `diff --git a/x b/x` in messages was incorrectly parsed. +--- + file.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/file.txt b/file.txt +index 3367afd..3e75765 100644 +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-old ++new +-- +2.52.0 + diff --git a/tests/compat/git/format_patch_diff_in_message/out/file.txt b/tests/compat/git/format_patch_diff_in_message/out/file.txt new file mode 100644 index 00000000..3e757656 --- /dev/null +++ b/tests/compat/git/format_patch_diff_in_message/out/file.txt @@ -0,0 +1 @@ +new diff --git a/tests/compat/git/format_patch_multiple_separators/in/file.txt b/tests/compat/git/format_patch_multiple_separators/in/file.txt new file mode 100644 index 00000000..3367afdb --- /dev/null +++ b/tests/compat/git/format_patch_multiple_separators/in/file.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/format_patch_multiple_separators/in/foo.patch b/tests/compat/git/format_patch_multiple_separators/in/foo.patch new file mode 100644 index 00000000..a0da5b94 --- /dev/null +++ b/tests/compat/git/format_patch_multiple_separators/in/foo.patch @@ -0,0 +1,21 @@ +From 6bfbbfa49a16bb8173145a933fe5ad918ad48a31 Mon Sep 17 00:00:00 2001 +From: Test +Date: Wed, 1 Jan 2020 00:00:00 +0000 +Subject: [PATCH] Add content with --- markers + +--- + file.txt | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/file.txt b/file.txt +index 3367afd..c4d4ea8 100644 +--- a/file.txt ++++ b/file.txt +@@ -1 +1,3 @@ +-old ++--- ++new ++--- +-- +2.52.0 + diff --git a/tests/compat/git/format_patch_multiple_separators/out/file.txt b/tests/compat/git/format_patch_multiple_separators/out/file.txt new file mode 100644 index 00000000..c4d4ea8d --- /dev/null +++ b/tests/compat/git/format_patch_multiple_separators/out/file.txt @@ -0,0 +1,3 @@ +--- +new +--- diff --git a/tests/compat/git/format_patch_preamble/in/file.txt b/tests/compat/git/format_patch_preamble/in/file.txt new file mode 100644 index 00000000..3367afdb --- /dev/null +++ b/tests/compat/git/format_patch_preamble/in/file.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/format_patch_preamble/in/foo.patch b/tests/compat/git/format_patch_preamble/in/foo.patch new file mode 100644 index 00000000..9d904250 --- /dev/null +++ b/tests/compat/git/format_patch_preamble/in/foo.patch @@ -0,0 +1,20 @@ +From ddbc9053359329dd016ed89f0d6e460b3b8ff5e3 Mon Sep 17 00:00:00 2001 +From: Test +Date: Wed, 1 Jan 2020 00:00:00 +0000 +Subject: [PATCH] Add new content + +This is the commit body. +--- + file.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/file.txt b/file.txt +index 3367afd..3e75765 100644 +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-old ++new +-- +2.52.0 + diff --git a/tests/compat/git/format_patch_preamble/out/file.txt b/tests/compat/git/format_patch_preamble/out/file.txt new file mode 100644 index 00000000..3e757656 --- /dev/null +++ b/tests/compat/git/format_patch_preamble/out/file.txt @@ -0,0 +1 @@ +new diff --git a/tests/compat/git/format_patch_signature/in/file.txt b/tests/compat/git/format_patch_signature/in/file.txt new file mode 100644 index 00000000..3367afdb --- /dev/null +++ b/tests/compat/git/format_patch_signature/in/file.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/format_patch_signature/in/foo.patch b/tests/compat/git/format_patch_signature/in/foo.patch new file mode 100644 index 00000000..7614287a --- /dev/null +++ b/tests/compat/git/format_patch_signature/in/foo.patch @@ -0,0 +1,19 @@ +From b3bb3125eff3d2648f15af2a6e0cdcdf6ad8fce1 Mon Sep 17 00:00:00 2001 +From: Test +Date: Wed, 1 Jan 2020 00:00:00 +0000 +Subject: [PATCH] modify + +--- + file.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/file.txt b/file.txt +index 3367afd..3e75765 100644 +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-old ++new +-- +2.52.0 + diff --git a/tests/compat/git/format_patch_signature/out/file.txt b/tests/compat/git/format_patch_signature/out/file.txt new file mode 100644 index 00000000..3e757656 --- /dev/null +++ b/tests/compat/git/format_patch_signature/out/file.txt @@ -0,0 +1 @@ +new diff --git a/tests/compat/git/junk_between_files/in/bar.txt b/tests/compat/git/junk_between_files/in/bar.txt new file mode 100644 index 00000000..601d8ee1 --- /dev/null +++ b/tests/compat/git/junk_between_files/in/bar.txt @@ -0,0 +1 @@ +bar line1 diff --git a/tests/compat/git/junk_between_files/in/foo.patch b/tests/compat/git/junk_between_files/in/foo.patch new file mode 100644 index 00000000..dbc59398 --- /dev/null +++ b/tests/compat/git/junk_between_files/in/foo.patch @@ -0,0 +1,17 @@ +diff --git a/foo.txt b/foo.txt +index 1234567..89abcdef 100644 +--- a/foo.txt ++++ b/foo.txt +@@ -1 +1 @@ +-foo line1 ++FOO LINE1 +JUNK BETWEEN FILES!!!! +This preamble text should be ignored +by both git apply and diffy +diff --git a/bar.txt b/bar.txt +index 1234567..89abcdef 100644 +--- a/bar.txt ++++ b/bar.txt +@@ -1 +1 @@ +-bar line1 ++BAR LINE1 diff --git a/tests/compat/git/junk_between_files/in/foo.txt b/tests/compat/git/junk_between_files/in/foo.txt new file mode 100644 index 00000000..b11358e1 --- /dev/null +++ b/tests/compat/git/junk_between_files/in/foo.txt @@ -0,0 +1 @@ +foo line1 diff --git a/tests/compat/git/junk_between_files/out/bar.txt b/tests/compat/git/junk_between_files/out/bar.txt new file mode 100644 index 00000000..76c036d0 --- /dev/null +++ b/tests/compat/git/junk_between_files/out/bar.txt @@ -0,0 +1 @@ +BAR LINE1 diff --git a/tests/compat/git/junk_between_files/out/foo.txt b/tests/compat/git/junk_between_files/out/foo.txt new file mode 100644 index 00000000..787bc665 --- /dev/null +++ b/tests/compat/git/junk_between_files/out/foo.txt @@ -0,0 +1 @@ +FOO LINE1 diff --git a/tests/compat/git/junk_between_hunks/in/file.txt b/tests/compat/git/junk_between_hunks/in/file.txt new file mode 100644 index 00000000..822aed3f --- /dev/null +++ b/tests/compat/git/junk_between_hunks/in/file.txt @@ -0,0 +1,9 @@ +line1 +line2 +line3 +line4 +line5 +line6 +line7 +line8 +line9 diff --git a/tests/compat/git/junk_between_hunks/in/foo.patch b/tests/compat/git/junk_between_hunks/in/foo.patch new file mode 100644 index 00000000..242e959c --- /dev/null +++ b/tests/compat/git/junk_between_hunks/in/foo.patch @@ -0,0 +1,15 @@ +diff --git a/file.txt b/file.txt +index 1234567..89abcdef 100644 +--- a/file.txt ++++ b/file.txt +@@ -1,3 +1,3 @@ +-line1 ++LINE1 + line2 + line3 +JUNK BETWEEN HUNKS +@@ -7,3 +7,3 @@ + line7 +-line8 ++LINE8 + line9 diff --git a/tests/compat/git/junk_between_hunks/out/file.txt b/tests/compat/git/junk_between_hunks/out/file.txt new file mode 100644 index 00000000..2e5e454d --- /dev/null +++ b/tests/compat/git/junk_between_hunks/out/file.txt @@ -0,0 +1,9 @@ +LINE1 +line2 +line3 +line4 +line5 +line6 +line7 +line8 +line9 diff --git a/tests/compat/git/mod.rs b/tests/compat/git/mod.rs new file mode 100644 index 00000000..335a63c6 --- /dev/null +++ b/tests/compat/git/mod.rs @@ -0,0 +1,214 @@ +//! Git compatibility tests. See [`crate`] for test structure and usage. +//! +//! Focus areas: +//! +//! - `diff --git` path parsing edge cases (quotes, spaces, ambiguous prefixes) +//! - `git format-patch` email format (preamble/signature stripping) +//! - Agreement between diffy and `git apply` + +use crate::common::Case; + +#[test] +fn path_no_prefix() { + Case::git("path_no_prefix").run(); +} + +#[test] +fn path_quoted_escapes() { + Case::git("path_quoted_escapes").strip(1).run(); +} + +// Git uses C-style named escapes (\a, \b, \f, \v) for certain control +// characters in quoted filenames. Both `git apply` and GNU patch decode +// these correctly. +// +// Observed with git 2.53.0: +// $ printf 'x' > "$(printf 'bel\a')" && git add -A +// $ git diff --cached | grep '+++' +// +++ "b/bel\a" +// +// diffy now decodes these correctly. +#[test] +fn path_quoted_named_escape() { + Case::git("path_quoted_named_escape").strip(1).run(); +} + +// Git uses 3-digit octal escapes (\000-\377) for bytes that don't have +// a named escape. Both `git apply` and GNU patch decode these correctly. +// +// Observed with git 2.53.0: +// $ printf 'x' > "$(printf 'tl\033')" && git add -A +// $ git diff --cached | grep '+++' +// +++ "b/tl\033" +// +// Found via full-history replay test against llvm/llvm-project +// (commits 17af06ba..229c95ab, 6c031780..0683a1e5). +#[test] +fn path_quoted_octal_escape() { + Case::git("path_quoted_octal_escape").strip(1).run(); +} + +#[test] +fn path_with_spaces() { + Case::git("path_with_spaces").strip(1).run(); +} + +#[test] +fn path_containing_space_b() { + Case::git("path_containing_space_b").strip(1).run(); +} + +#[test] +fn format_patch_preamble() { + // Ambiguous: where does preamble end? First `\n---\n` - verify matches git + Case::git("format_patch_preamble").strip(1).run(); +} + +#[test] +fn format_patch_diff_in_message() { + // `diff --git` in commit message must NOT trigger early parsing + Case::git("format_patch_diff_in_message").strip(1).run(); +} + +#[test] +fn format_patch_multiple_separators() { + // Git uses first `\n---\n` as separator (observed git mailinfo behavior) + Case::git("format_patch_multiple_separators").strip(1).run(); +} + +#[test] +fn format_patch_signature() { + // Ambiguous: `\n-- \n` could appear in patch content - verify matches git + Case::git("format_patch_signature").strip(1).run(); +} + +#[test] +fn nested_diff_signature() { + // Patch that deletes a diff file containing `-- ` patterns within its content, + // followed by a real email signature at the end. + // + // Tests that we correctly distinguish between: + // - `-- ` appearing as patch content (from inner diff's empty context lines) + // - `-- ` appearing as the actual email signature separator + // + // Both git apply and GNU patch handle this correctly. + Case::git("nested_diff_signature").strip(1).run(); +} + +#[test] +fn path_ambiguous_suffix() { + // Multiple valid splits in `diff --git` line; algorithm picks longest common suffix. + // Tests the pathological case from parse.rs comments where custom prefix + // creates `src/foo.rs src/foo.rs src/foo.rs src/foo.rs` - verify matches git. + Case::git("path_ambiguous_suffix").strip(1).run(); +} + +// Both --- and +++ point to /dev/null. +// git apply rejects: "dev/null: No such file or directory" +#[test] +fn fail_both_devnull() { + Case::git("fail_both_devnull") + .strip(1) + .expect_success(false) + .run(); +} + +// Single-file patch with junk between hunks. +// +// - git apply: errors ("patch fragment without header") +// - diffy: succeeds, ignores trailing junk (matches GNU patch behavior) +#[test] +fn junk_between_hunks() { + Case::git("junk_between_hunks") + .strip(1) + .expect_compat(false) + .run(); +} + +// Mixed binary and text patch. +// +// Both git apply and diffy should apply both the binary and text changes. +#[test] +fn binary_and_text_mixed() { + Case::git("binary_and_text_mixed").strip(1).run(); +} + +// Binary patch in literal format (new file creation). +#[test] +#[cfg(feature = "binary")] +fn binary_literal() { + Case::git("binary_literal").strip(1).run(); +} + +// Binary patch in delta format (modify existing file). +#[test] +#[cfg(feature = "binary")] +fn binary_delta() { + Case::git("binary_delta").strip(1).run(); +} + +// Binary literal patch applied to wrong original content. +// +// This documents a behavioral difference: +// - diffy: succeeds (skips validation, ignores original for literal format) +// - git: fails (validates original content via index hash before applying) +// +// diffy's behavior is intentional - we don't have access to git's object database +// to verify hashes, and for literal format the original content isn't needed anyway. +#[test] +#[cfg(feature = "binary")] +fn binary_literal_wrong_original() { + Case::git("binary_literal_wrong_original") + .strip(1) + .expect_compat(false) + .run(); +} + +// Binary delta patch applied to wrong original content. +// +// Both diffy and git fail, but for different reasons: +// - diffy: fails because delta instructions reference wrong offsets/sizes +// - git: fails because index hash doesn't match before even trying to apply +// +// This test verifies diffy correctly rejects invalid delta applications. +#[test] +#[cfg(feature = "binary")] +fn binary_delta_wrong_original() { + Case::git("binary_delta_wrong_original") + .strip(1) + .expect_success(false) + .run(); +} + +// Binary patch with mixed delta/literal format. +// +// Git can choose different encodings for forward and reverse transformations +// based on which is more efficient. This patch has: +// - forward (original -> modified): delta +// - reverse (modified -> original): literal +// +// From rust-lang/rust@ad46af24 (favicon-32x32.png update). +#[test] +#[cfg(feature = "binary")] +fn binary_mixed_delta_literal() { + Case::git("binary_mixed_delta_literal").strip(1).run(); +} + +// Patch with non-UTF-8 bytes (0x80, 0xff) in hunk content. +// Both git apply and diffy handle raw bytes correctly. +#[test] +fn non_utf8_hunk_content() { + Case::git("non_utf8_hunk_content").strip(1).run(); +} + +// Multi-file patch with junk/preamble text between different files. +// +// git apply behavior: Ignores content between `diff --git` boundaries. +// In GitDiff mode, splitting occurs at `diff --git`, so junk between +// files becomes trailing content of the previous chunk (harmless). +// +// This is different from junk between HUNKS of the same file (which fails). +#[test] +fn junk_between_files() { + Case::git("junk_between_files").strip(1).run(); +} diff --git a/tests/compat/git/nested_diff_signature/in/example.rs b/tests/compat/git/nested_diff_signature/in/example.rs new file mode 100644 index 00000000..8f3b7ef1 --- /dev/null +++ b/tests/compat/git/nested_diff_signature/in/example.rs @@ -0,0 +1 @@ +fn foo() {} diff --git a/tests/compat/git/nested_diff_signature/in/foo.patch b/tests/compat/git/nested_diff_signature/in/foo.patch new file mode 100644 index 00000000..5d876c61 --- /dev/null +++ b/tests/compat/git/nested_diff_signature/in/foo.patch @@ -0,0 +1,25 @@ +diff --git a/mir-test.diff b/mir-test.diff +deleted file mode 100644 +index 98012d7..0000000 +--- a/mir-test.diff ++++ /dev/null +@@ -1,12 +0,0 @@ +-- // MIR before +-+ // MIR after +- +- fn opt() { +- bb0: { +-- nop; +-- } +-- +-- bb1: { +-- nop; +- } +- } +diff --git a/example.rs b/example.rs +index 8f3b7ef..2a40712 100644 +--- a/example.rs ++++ b/example.rs +@@ -1 +1,2 @@ + fn foo() {} ++fn bar() {} diff --git a/tests/compat/git/nested_diff_signature/in/mir-test.diff b/tests/compat/git/nested_diff_signature/in/mir-test.diff new file mode 100644 index 00000000..98012d7e --- /dev/null +++ b/tests/compat/git/nested_diff_signature/in/mir-test.diff @@ -0,0 +1,12 @@ +- // MIR before ++ // MIR after + + fn opt() { + bb0: { +- nop; +- } +- +- bb1: { +- nop; + } + } diff --git a/tests/compat/git/nested_diff_signature/out/example.rs b/tests/compat/git/nested_diff_signature/out/example.rs new file mode 100644 index 00000000..2a40712e --- /dev/null +++ b/tests/compat/git/nested_diff_signature/out/example.rs @@ -0,0 +1,2 @@ +fn foo() {} +fn bar() {} diff --git a/tests/compat/git/nested_diff_signature/out/mir-test.diff b/tests/compat/git/nested_diff_signature/out/mir-test.diff new file mode 100644 index 00000000..e69de29b diff --git a/tests/compat/git/non_utf8_hunk_content/in/file.bin b/tests/compat/git/non_utf8_hunk_content/in/file.bin new file mode 100644 index 00000000..5ddb01f6 --- /dev/null +++ b/tests/compat/git/non_utf8_hunk_content/in/file.bin @@ -0,0 +1 @@ +hello €˙ world diff --git a/tests/compat/git/non_utf8_hunk_content/in/foo.patch b/tests/compat/git/non_utf8_hunk_content/in/foo.patch new file mode 100644 index 00000000..0c3f72f2 --- /dev/null +++ b/tests/compat/git/non_utf8_hunk_content/in/foo.patch @@ -0,0 +1,6 @@ +diff --git a/file.bin b/file.bin +--- a/file.bin ++++ b/file.bin +@@ -1 +1 @@ +-hello €˙ world ++hello €˙ universe diff --git a/tests/compat/git/non_utf8_hunk_content/out/file.bin b/tests/compat/git/non_utf8_hunk_content/out/file.bin new file mode 100644 index 00000000..715e61f2 --- /dev/null +++ b/tests/compat/git/non_utf8_hunk_content/out/file.bin @@ -0,0 +1 @@ +hello €˙ universe diff --git a/tests/compat/git/path_ambiguous_suffix/in/foo.patch b/tests/compat/git/path_ambiguous_suffix/in/foo.patch new file mode 100644 index 00000000..a6815eb7 --- /dev/null +++ b/tests/compat/git/path_ambiguous_suffix/in/foo.patch @@ -0,0 +1,3 @@ +diff --git src/foo.rs src/foo.rs src/foo.rs src/foo.rs +new file mode 100644 +index 0000000..e69de29 diff --git a/tests/compat/git/path_ambiguous_suffix/out/foo.rs src/foo.rs b/tests/compat/git/path_ambiguous_suffix/out/foo.rs src/foo.rs new file mode 100644 index 00000000..e69de29b diff --git a/tests/compat/git/path_containing_space_b/in/foo b/baz.txt b/tests/compat/git/path_containing_space_b/in/foo b/baz.txt new file mode 100644 index 00000000..3367afdb --- /dev/null +++ b/tests/compat/git/path_containing_space_b/in/foo b/baz.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/path_containing_space_b/in/foo.patch b/tests/compat/git/path_containing_space_b/in/foo.patch new file mode 100644 index 00000000..15c6fca2 --- /dev/null +++ b/tests/compat/git/path_containing_space_b/in/foo.patch @@ -0,0 +1,7 @@ +diff --git a/foo b/baz.txt b/foo b/baz.txt +index 3367afd..3e75765 100644 +--- a/foo b/baz.txt ++++ b/foo b/baz.txt +@@ -1 +1 @@ +-old ++new diff --git a/tests/compat/git/path_containing_space_b/out/foo b/baz.txt b/tests/compat/git/path_containing_space_b/out/foo b/baz.txt new file mode 100644 index 00000000..3e757656 --- /dev/null +++ b/tests/compat/git/path_containing_space_b/out/foo b/baz.txt @@ -0,0 +1 @@ +new diff --git a/tests/compat/git/path_no_prefix/in/file.txt b/tests/compat/git/path_no_prefix/in/file.txt new file mode 100644 index 00000000..3367afdb --- /dev/null +++ b/tests/compat/git/path_no_prefix/in/file.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/path_no_prefix/in/foo.patch b/tests/compat/git/path_no_prefix/in/foo.patch new file mode 100644 index 00000000..5e2a9f8f --- /dev/null +++ b/tests/compat/git/path_no_prefix/in/foo.patch @@ -0,0 +1,7 @@ +diff --git file.txt file.txt +index 3367afd..3e75765 100644 +--- file.txt ++++ file.txt +@@ -1 +1 @@ +-old ++new diff --git a/tests/compat/git/path_no_prefix/out/file.txt b/tests/compat/git/path_no_prefix/out/file.txt new file mode 100644 index 00000000..3e757656 --- /dev/null +++ b/tests/compat/git/path_no_prefix/out/file.txt @@ -0,0 +1 @@ +new diff --git "a/tests/compat/git/path_quoted_escapes/in/foo\tbar.txt" "b/tests/compat/git/path_quoted_escapes/in/foo\tbar.txt" new file mode 100644 index 00000000..3367afdb --- /dev/null +++ "b/tests/compat/git/path_quoted_escapes/in/foo\tbar.txt" @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/path_quoted_escapes/in/foo.patch b/tests/compat/git/path_quoted_escapes/in/foo.patch new file mode 100644 index 00000000..26692238 --- /dev/null +++ b/tests/compat/git/path_quoted_escapes/in/foo.patch @@ -0,0 +1,7 @@ +diff --git "a/foo\tbar.txt" "b/foo\tbar.txt" +index 3367afd..3e75765 100644 +--- "a/foo\tbar.txt" ++++ "b/foo\tbar.txt" +@@ -1 +1 @@ +-old ++new diff --git "a/tests/compat/git/path_quoted_escapes/out/foo\tbar.txt" "b/tests/compat/git/path_quoted_escapes/out/foo\tbar.txt" new file mode 100644 index 00000000..3e757656 --- /dev/null +++ "b/tests/compat/git/path_quoted_escapes/out/foo\tbar.txt" @@ -0,0 +1 @@ +new diff --git a/tests/compat/git/path_quoted_named_escape/in/foo.patch b/tests/compat/git/path_quoted_named_escape/in/foo.patch new file mode 100644 index 00000000..cfd3f562 --- /dev/null +++ b/tests/compat/git/path_quoted_named_escape/in/foo.patch @@ -0,0 +1,6 @@ +diff --git "a/bel\a" "b/bel\a" +new file mode 100644 +--- /dev/null ++++ "b/bel\a" +@@ -0,0 +1 @@ ++hello diff --git "a/tests/compat/git/path_quoted_named_escape/out/bel\a" "b/tests/compat/git/path_quoted_named_escape/out/bel\a" new file mode 100644 index 00000000..ce013625 --- /dev/null +++ "b/tests/compat/git/path_quoted_named_escape/out/bel\a" @@ -0,0 +1 @@ +hello diff --git a/tests/compat/git/path_quoted_octal_escape/in/foo.patch b/tests/compat/git/path_quoted_octal_escape/in/foo.patch new file mode 100644 index 00000000..5dda9ec6 --- /dev/null +++ b/tests/compat/git/path_quoted_octal_escape/in/foo.patch @@ -0,0 +1,6 @@ +diff --git "a/tl\033" "b/tl\033" +new file mode 100644 +--- /dev/null ++++ "b/tl\033" +@@ -0,0 +1 @@ ++hello diff --git "a/tests/compat/git/path_quoted_octal_escape/out/tl\033" "b/tests/compat/git/path_quoted_octal_escape/out/tl\033" new file mode 100644 index 00000000..ce013625 --- /dev/null +++ "b/tests/compat/git/path_quoted_octal_escape/out/tl\033" @@ -0,0 +1 @@ +hello diff --git a/tests/compat/git/path_with_spaces/in/foo bar.txt b/tests/compat/git/path_with_spaces/in/foo bar.txt new file mode 100644 index 00000000..3367afdb --- /dev/null +++ b/tests/compat/git/path_with_spaces/in/foo bar.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/path_with_spaces/in/foo.patch b/tests/compat/git/path_with_spaces/in/foo.patch new file mode 100644 index 00000000..b3d1d463 --- /dev/null +++ b/tests/compat/git/path_with_spaces/in/foo.patch @@ -0,0 +1,7 @@ +diff --git a/foo bar.txt b/foo bar.txt +index 3367afd..3e75765 100644 +--- a/foo bar.txt ++++ b/foo bar.txt +@@ -1 +1 @@ +-old ++new diff --git a/tests/compat/git/path_with_spaces/out/foo bar.txt b/tests/compat/git/path_with_spaces/out/foo bar.txt new file mode 100644 index 00000000..3e757656 --- /dev/null +++ b/tests/compat/git/path_with_spaces/out/foo bar.txt @@ -0,0 +1 @@ +new diff --git a/tests/compat/gnu_patch/mod.rs b/tests/compat/gnu_patch/mod.rs index fe59fcb7..643d16e9 100644 --- a/tests/compat/gnu_patch/mod.rs +++ b/tests/compat/gnu_patch/mod.rs @@ -151,6 +151,13 @@ fn junk_between_hunks() { Case::gnu_patch("junk_between_hunks").run(); } +// Patch with non-UTF-8 bytes (0x80, 0xff) in hunk content. +// Both GNU patch and diffy handle raw bytes correctly. +#[test] +fn non_utf8_hunk_content() { + Case::gnu_patch("non_utf8_hunk_content").run(); +} + // Failure cases #[test] diff --git a/tests/compat/gnu_patch/non_utf8_hunk_content/in/file.bin b/tests/compat/gnu_patch/non_utf8_hunk_content/in/file.bin new file mode 100644 index 00000000..5ddb01f6 --- /dev/null +++ b/tests/compat/gnu_patch/non_utf8_hunk_content/in/file.bin @@ -0,0 +1 @@ +hello €˙ world diff --git a/tests/compat/gnu_patch/non_utf8_hunk_content/in/foo.patch b/tests/compat/gnu_patch/non_utf8_hunk_content/in/foo.patch new file mode 100644 index 00000000..10e99970 --- /dev/null +++ b/tests/compat/gnu_patch/non_utf8_hunk_content/in/foo.patch @@ -0,0 +1,5 @@ +--- file.bin ++++ file.bin +@@ -1 +1 @@ +-hello €˙ world ++hello €˙ universe diff --git a/tests/compat/gnu_patch/non_utf8_hunk_content/out/file.bin b/tests/compat/gnu_patch/non_utf8_hunk_content/out/file.bin new file mode 100644 index 00000000..715e61f2 --- /dev/null +++ b/tests/compat/gnu_patch/non_utf8_hunk_content/out/file.bin @@ -0,0 +1 @@ +hello €˙ universe diff --git a/tests/compat/main.rs b/tests/compat/main.rs index 8faf9eac..2c0afe7c 100644 --- a/tests/compat/main.rs +++ b/tests/compat/main.rs @@ -20,28 +20,30 @@ //! //! ```sh //! # Run all compat tests -//! cargo test --test compat +//! cargo test --test compat -F binary //! //! # Run with reference tool comparison (CI mode) -//! CI=1 cargo test --test compat +//! CI=1 cargo test --test compat -F binary //! //! # For Nix users, run this to ensure you have GNU patch -//! CI=1 nix shell nixpkgs#gnupatch -c cargo test --test compat +//! CI=1 nix shell nixpkgs#gnupatch -c cargo test --test compat -F binary //! ``` //! //! ## Regenerating snapshots //! //! ```sh -//! SNAPSHOTS=overwrite cargo test --test compat +//! SNAPSHOTS=overwrite cargo test --test compat -F binary //! ``` //! //! ## Adding new test cases //! //! 1. Create `case_name/in/` with input file(s) and `foo.patch` -//! 2. Run `SNAPSHOTS=overwrite cargo test --test compat` to generate `out/` -//! 3. Add `#[test] fn case_name() { Case::gnu_patch(...).run(); }` in the module +//! 2. Run `SNAPSHOTS=overwrite cargo test --test compat -F binary` to generate `out/` +//! 3. Add `#[test] fn case_name() { Case::{gnu_patch,git}(...).run(); }` in the module //! //! For failure tests, use `.expect_success(false)` and skip step 2. +//! For intentional compat divergence, use `.expect_compat(false)`. mod common; +mod git; mod gnu_patch; diff --git a/tests/replay.rs b/tests/replay.rs index 7eaa56ba..cbf07acf 100644 --- a/tests/replay.rs +++ b/tests/replay.rs @@ -6,7 +6,7 @@ //! ## Usage //! //! ```console -//! $ cargo test --test replay -- --ignored --nocapture +//! $ cargo test --test replay -F binary -- --ignored --nocapture //! ``` //! //! ## Environment Variables @@ -18,8 +18,7 @@ //! * A range (e.g., `abc123..def456`) for a specific commit range //! //! Defaults to 200. Use `0` to verify entire history. -//! * `DIFFY_TEST_PARSE_MODE`: Parse mode to use. -//! Currently only `unidiff` is supported. +//! * `DIFFY_TEST_PARSE_MODE`: Parse mode to use (`unidiff` or `gitdiff`). //! Defaults to `unidiff`. //! //! ## Requirements @@ -60,7 +59,10 @@ use std::{ sync::Mutex, }; -use diffy::patch_set::{FileOperation, ParseOptions, PatchKind, PatchSet}; +use diffy::{ + binary::BinaryPatch, + patch_set::{FileOperation, ParseOptions, PatchKind, PatchSet}, +}; use rayon::prelude::*; /// Persistent `git cat-file --batch` process for fast object lookups. @@ -150,23 +152,20 @@ impl CatFile { Some(buf) } - - /// Like [`CatFile::get`] but returns only UTF-8 string. - fn get_text(&mut self, rev: &str, path: &str) -> Option { - self.get(rev, path).and_then(|b| String::from_utf8(b).ok()) - } } /// Local enum for test configuration (maps to ParseOptions). #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum TestMode { UniDiff, + GitDiff, } impl From for ParseOptions { fn from(value: TestMode) -> Self { match value { TestMode::UniDiff => ParseOptions::unidiff(), + TestMode::GitDiff => ParseOptions::gitdiff(), } } } @@ -228,11 +227,12 @@ fn test_mode() -> TestMode { }; match val.trim().to_lowercase().as_str() { "unidiff" => TestMode::UniDiff, - _ => panic!("invalid DIFFY_TEST_PARSE_MODE='{val}': expected 'unidiff'"), + "gitdiff" => TestMode::GitDiff, + _ => panic!("invalid DIFFY_TEST_PARSE_MODE='{val}': expected 'unidiff' or 'gitdiff'"), } } -fn git(repo: &Path, args: &[&str]) -> String { +fn git_bytes(repo: &Path, args: &[&str]) -> Vec { let mut cmd = Command::new("git"); cmd.env("GIT_CONFIG_NOSYSTEM", "1"); cmd.env("GIT_CONFIG_GLOBAL", "/dev/null"); @@ -246,7 +246,11 @@ fn git(repo: &Path, args: &[&str]) -> String { panic!("git {args:?} failed: {stderr}"); } - String::from_utf8_lossy(&output.stdout).into_owned() + output.stdout +} + +fn git(repo: &Path, args: &[&str]) -> String { + String::from_utf8_lossy(&git_bytes(repo, args)).into_owned() } /// Get the list of commits from oldest to newest. @@ -323,8 +327,10 @@ fn process_commit( // UniDiff format cannot express pure renames (no ---/+++ headers). // Use `--no-renames` to represent them as delete + create instead. + // GitDiff mode uses `--binary` to get actual binary patch data. let diff_output = match mode { - TestMode::UniDiff => git(repo, &["diff", "--no-renames", parent, child]), + TestMode::UniDiff => git_bytes(repo, &["diff", "--no-renames", parent, child]), + TestMode::GitDiff => git_bytes(repo, &["diff", "--binary", parent, child]), }; if diff_output.is_empty() { @@ -373,6 +379,18 @@ fn process_commit( } text_files + type_changes } + TestMode::GitDiff => { + // With `--binary`, all files including binary ones have patch data. + let raw = git(repo, &["diff", "--raw", parent, child]); + let (mut total, mut type_changes) = (0, 0); + for line in raw.lines().filter(|l| !l.is_empty()) { + total += 1; + if is_type_change(line) { + type_changes += 1; + } + } + total + type_changes + } }; if expected_file_count == 0 { @@ -385,9 +403,10 @@ fn process_commit( }; } - let patchset: Vec<_> = match PatchSet::parse(&diff_output, mode.into()).collect() { + let patchset: Vec<_> = match PatchSet::parse_bytes(&diff_output, mode.into()).collect() { Ok(ps) => ps, Err(e) => { + let diff_output = String::from_utf8_lossy(&diff_output); panic!( "Failed to parse patch for {parent_short}..{child_short}: {e}\n\n\ Diff:\n{diff_output}" @@ -399,6 +418,7 @@ fn process_commit( // This catches both missing and spurious patches. if patchset.len() != expected_file_count { let n = patchset.len(); + let diff_output = String::from_utf8_lossy(&diff_output); panic!( "Patch count mismatch for {parent_short}..{child_short}: \ expected {expected_file_count} files, parsed {n} patches\n\n\ @@ -442,42 +462,88 @@ fn process_commit( match file_patch.patch() { PatchKind::Text(patch) => { let base_content = if let Some(path) = base_path { - let Some(content) = cat.get_text(parent, path) else { + let Some(content) = cat.get(parent, path) else { skipped += 1; continue; }; content } else { - String::new() + Vec::new() }; let expected_content = if let Some(path) = target_path { - let Some(content) = cat.get_text(child, path) else { + let Some(content) = cat.get(child, path) else { skipped += 1; continue; }; content } else { - String::new() + Vec::new() }; - let result = match diffy::apply(&base_content, patch) { + let result = match diffy::apply_bytes(&base_content, patch) { Ok(r) => r, Err(e) => { + let base_content = String::from_utf8_lossy(&base_content); panic!( "Failed to apply patch at {parent_short}..{child_short} for {desc}: {e}\n\n\ - Patch:\n{patch}\n\n\ Base content:\n{base_content}" ); } }; if result != expected_content { + let expected_content = String::from_utf8_lossy(&expected_content); + let result = String::from_utf8_lossy(&result); panic!( "Content mismatch at {parent_short}..{child_short} for {desc}\n\n\ --- Expected ---\n{expected_content}\n\n\ - --- Got ---\n{result}\n\n\ - --- Patch ---\n{patch}" + --- Got ---\n{result}" + ); + } + } + PatchKind::Binary(BinaryPatch::Marker) => { + // `Binary files differ` marker - no actual data to apply + skipped += 1; + continue; + } + PatchKind::Binary(patch) => { + // Get content as bytes + let base_content = if let Some(path) = base_path { + let Some(content) = cat.get(parent, path) else { + skipped += 1; + continue; + }; + content + } else { + Vec::new() + }; + + let expected_content = if let Some(path) = target_path { + let Some(content) = cat.get(child, path) else { + skipped += 1; + continue; + }; + content + } else { + Vec::new() + }; + + let result = match patch.apply(&base_content) { + Ok(r) => r, + Err(e) => { + panic!( + "Failed to apply binary patch at {parent_short}..{child_short} for {desc}: {e}" + ); + } + }; + + if result != expected_content { + panic!( + "Binary content mismatch at {parent_short}..{child_short} for {desc}\n\n\ + Expected {} bytes, got {} bytes", + expected_content.len(), + result.len() ); } } @@ -518,6 +584,7 @@ fn replay() { .unwrap_or_else(|| ".".to_string()); let mode_name = match mode { TestMode::UniDiff => "unidiff", + TestMode::GitDiff => "gitdiff", }; // Shared state for progress reporting