diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d1c82923..2f514b37 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -58,19 +58,19 @@ jobs: commits: '0' - name: golang/go repo_url: https://github.com/golang/go - commits: '200' + commits: '2000' - name: git/git repo_url: https://github.com/git/git - commits: '100' + commits: '1000' - name: rust-lang/rust repo_url: https://github.com/rust-lang/rust - commits: '30' + commits: '50' - name: torvalds/linux repo_url: https://github.com/torvalds/linux - commits: '30' + commits: '40' - name: llvm/llvm-project repo_url: https://github.com/llvm/llvm-project - commits: '30' + commits: '50' name: replay (${{ matrix.name }}) uses: ./.github/workflows/replay.yml with: diff --git a/.github/workflows/replay.yml b/.github/workflows/replay.yml index a9f8f997..08384d61 100644 --- a/.github/workflows/replay.yml +++ b/.github/workflows/replay.yml @@ -44,7 +44,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - parse_mode: [unidiff] + parse_mode: [unidiff, gitdiff] name: ${{ inputs.name && matrix.parse_mode || format('{0} ({1}, {2})', inputs.repo_url, matrix.parse_mode, inputs.commits) }} steps: - uses: actions/checkout@v6 diff --git a/src/patch_set/error.rs b/src/patch_set/error.rs index ba60e861..3a8fc616 100644 --- a/src/patch_set/error.rs +++ b/src/patch_set/error.rs @@ -70,6 +70,12 @@ pub(crate) enum PatchSetParseErrorKind { /// Create patch missing modified path. CreateMissingModifiedPath, + + /// Invalid file mode string. + InvalidFileMode(String), + + /// Invalid `diff --git` path. + InvalidDiffGitPath, } impl fmt::Display for PatchSetParseErrorKind { @@ -81,6 +87,8 @@ impl fmt::Display for PatchSetParseErrorKind { Self::BothDevNull => write!(f, "patch has both original and modified as /dev/null"), Self::DeleteMissingOriginalPath => write!(f, "delete patch has no original path"), Self::CreateMissingModifiedPath => write!(f, "create patch has no modified path"), + Self::InvalidFileMode(mode) => write!(f, "invalid file mode: {mode}"), + Self::InvalidDiffGitPath => write!(f, "invalid diff --git path"), } } } diff --git a/src/patch_set/mod.rs b/src/patch_set/mod.rs index 67ac1f66..e70b9dc6 100644 --- a/src/patch_set/mod.rs +++ b/src/patch_set/mod.rs @@ -15,11 +15,25 @@ use crate::utils::Text; use crate::Patch; pub use error::PatchSetParseError; +use error::PatchSetParseErrorKind; pub use parse::PatchSet; /// Options for parsing patch content. /// -/// Use [`ParseOptions::unidiff()`] to create options for the desired format. +/// Use [`ParseOptions::unidiff()`] or [`ParseOptions::gitdiff()`] +/// to create options for the desired format. +/// +/// ## Binary Files +/// +/// When parsing git diffs, binary file changes are detected by: +/// +/// * `Binary files a/path and b/path differ` (`git diff` without `--binary` flag) +/// * `GIT binary patch` (from `git diff --binary`) +/// +/// Note that this is not a documented Git behavior, +/// so the implementation here is subject to change if Git changes. +/// +/// By default, binary diffs are skipped. /// /// ## Example /// @@ -44,10 +58,13 @@ pub struct ParseOptions { pub(crate) format: Format, } -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, Default)] pub(crate) enum Format { /// Standard unified diff format. + #[default] UniDiff, + /// Git extended diff format. + GitDiff, } impl ParseOptions { @@ -68,6 +85,22 @@ impl ParseOptions { format: Format::UniDiff, } } + + /// Parse as [git extended diff format][git-diff-format]. + /// + /// Supports all features of [`unidiff()`](Self::unidiff) plus: + /// + /// * `diff --git` headers + /// * Extended headers (`new file mode`, `deleted file mode`, etc.) + /// * Rename/copy detection (`rename from`/`rename to`, `copy from`/`copy to`) + /// * Binary file detection (emitted a marker by defualt) + /// + /// [git-diff-format]: https://git-scm.com/docs/diff-format + pub fn gitdiff() -> Self { + Self { + format: Format::GitDiff, + } + } } /// File mode extracted from git extended headers. @@ -83,11 +116,32 @@ pub enum FileMode { Gitlink, } +impl std::str::FromStr for FileMode { + type Err = PatchSetParseError; + + fn from_str(mode: &str) -> Result { + match mode { + "100644" => Ok(Self::Regular), + "100755" => Ok(Self::Executable), + "120000" => Ok(Self::Symlink), + "160000" => Ok(Self::Gitlink), + _ => Err(PatchSetParseErrorKind::InvalidFileMode(mode.to_owned()).into()), + } + } +} + /// The kind of patch content in a [`FilePatch`]. #[derive(Clone, PartialEq, Eq)] pub enum PatchKind<'a, T: ToOwned + ?Sized> { /// Text patch with hunks. Text(Patch<'a, T>), + /// Binary diff marker (no patch data). + /// + /// Emitted when a binary diff is detected + /// (e.g., `Binary files ... differ` or `GIT binary patch`). + /// + /// Useful when want to avoid decoding binary diff. + Binary, } impl std::fmt::Debug for PatchKind<'_, T> @@ -98,6 +152,7 @@ where fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { PatchKind::Text(patch) => f.debug_tuple("Text").field(patch).finish(), + PatchKind::Binary => f.write_str("Binary"), } } } @@ -107,8 +162,14 @@ impl<'a, T: ToOwned + ?Sized> PatchKind<'a, T> { pub fn as_text(&self) -> Option<&Patch<'a, T>> { match self { PatchKind::Text(patch) => Some(patch), + PatchKind::Binary => None, } } + + /// Returns `true` if this is a binary diff. + pub fn is_binary(&self) -> bool { + matches!(self, PatchKind::Binary) + } } /// A single file's patch with operation metadata. @@ -154,6 +215,19 @@ impl<'a, T: ToOwned + ?Sized> FilePatch<'a, T> { } } + fn new_binary( + operation: FileOperation<'a, T>, + old_mode: Option, + new_mode: Option, + ) -> Self { + Self { + operation, + kind: PatchKind::Binary, + old_mode, + new_mode, + } + } + /// Returns the file operation for this patch. pub fn operation(&self) -> &FileOperation<'a, T> { &self.operation diff --git a/src/patch_set/parse.rs b/src/patch_set/parse.rs index e0ef4b0e..79d98849 100644 --- a/src/patch_set/parse.rs +++ b/src/patch_set/parse.rs @@ -1,11 +1,12 @@ //! Parse multiple file patches from a unified diff. use super::{ - error::PatchSetParseErrorKind, FileOperation, FilePatch, Format, ParseOptions, + error::PatchSetParseErrorKind, FileMode, FileOperation, FilePatch, Format, ParseOptions, PatchSetParseError, }; use crate::patch::parse::parse_one; -use crate::utils::Text; +use crate::utils::{escaped_filename, Text}; +use crate::Patch; use std::borrow::Cow; @@ -112,6 +113,7 @@ fn next_patch<'a, T: Text + ?Sized>( let result = match ps.opts.format { Format::UniDiff => next_unidiff_patch(ps), + Format::GitDiff => next_gitdiff_patch(ps), }; if result.is_none() { @@ -186,6 +188,10 @@ fn find_patch_start(input: &T) -> Option { /// /// Returns the content after the first `\n---\n` separator. /// +/// TODO: This only handles a single email message. For mbox streams +/// (concatenation of multiple `git format-patch` outputs), we would need +/// to detect subsequent `From ` lines and split accordingly. +/// /// ## Observed git behavior /// /// `git mailinfo` (used by `git am`) uses the first `---` line @@ -212,6 +218,509 @@ fn strip_email_preamble(input: &T) -> &T { } } +fn next_gitdiff_patch<'a, T: Text + ?Sized>( + ps: &mut PatchSet<'a, T>, +) -> Option, PatchSetParseError>> { + let patch_start = find_gitdiff_start(remaining(ps))?; + ps.offset += patch_start; + ps.found_any = true; + + let abs_patch_start = ps.offset; + + // Parse extended headers incrementally — stops at first unrecognized line + let (header, header_consumed) = GitHeader::parse(remaining(ps)); + ps.offset += header_consumed; + + // Handle binary markers ("Binary files ... differ") and binary patches ("GIT binary patch") + if header.is_binary_marker || header.is_binary_patch { + let operation = match extract_file_op_binary(&header, abs_patch_start) { + Ok(op) => op, + Err(e) => return Some(Err(e)), + }; + let (old_mode, new_mode) = match parse_file_modes(&header) { + Ok(modes) => modes, + Err(mut e) => { + e.set_span(abs_patch_start..abs_patch_start); + return Some(Err(e)); + } + }; + return Some(Ok(FilePatch::new_binary(operation, old_mode, new_mode))); + } + + // `git diff` output format is stricter. + // There is no preamble between Git headers and unidiff patch portion, + // so we safely don't perform the preamble skipping. + // + // If we did, it would fail the pure rename/mode-change operation + // since those ops have no unidiff patch portion + // and is directly followed by the next `diff --git` header. + let opts = crate::patch::parse::ParseOpts::default().no_skip_preamble(); + let (result, consumed) = parse_one(remaining(ps), opts); + ps.offset += consumed; + let patch = match result { + Ok(patch) => patch, + Err(e) => return Some(Err(e.into())), + }; + + // FIXME: error spans point at `diff --git` line, not the specific offending line + let operation = match extract_file_op_gitdiff(&header, &patch) { + Ok(op) => op, + Err(mut e) => { + e.set_span(abs_patch_start..abs_patch_start); + return Some(Err(e)); + } + }; + + // FIXME: error spans point at `diff --git` line, not the specific offending line + let (old_mode, new_mode) = match parse_file_modes(&header) { + Ok(modes) => modes, + Err(mut e) => { + e.set_span(abs_patch_start..abs_patch_start); + return Some(Err(e)); + } + }; + + Some(Ok(FilePatch::new(operation, patch, old_mode, new_mode))) +} + +/// Finds the byte offset of the first `diff --git` line in `input`. +fn find_gitdiff_start(input: &T) -> Option { + let mut offset = 0; + for line in input.lines() { + if line.starts_with("diff --git ") { + return Some(offset); + } + offset += line.len(); + } + None +} + +/// Git extended header metadata. +/// +/// Extracted from lines between `diff --git` and `---` (or end of patch). +/// See [git-diff format documentation](https://git-scm.com/docs/diff-format). +#[derive(Debug)] +struct GitHeader<'a, T: ?Sized> { + /// Raw content after "diff --git " prefix. + /// + /// Only parsed in fallback when `---`/`+++` is absent (mode-only, binary, empty file). + diff_git_line: Option<&'a T>, + /// Source path from `rename from `. + rename_from: Option<&'a T>, + /// Destination path from `rename to `. + rename_to: Option<&'a T>, + /// Source path from `copy from `. + copy_from: Option<&'a T>, + /// Destination path from `copy to `. + copy_to: Option<&'a T>, + /// File mode from `old mode `. + old_mode: Option<&'a T>, + /// File mode from `new mode `. + new_mode: Option<&'a T>, + /// File mode from `new file mode `. + new_file_mode: Option<&'a T>, + /// File mode from `deleted file mode `. + deleted_file_mode: Option<&'a T>, + /// Whether this is a binary diff with no actual patch content. + /// + /// Observed `git diff` output (without `--binary`): + /// + /// ```text + /// diff --git a/image.png b/image.png + /// new file mode 100644 + /// index 0000000..7c4530c + /// Binary files /dev/null and b/image.png differ + /// ``` + is_binary_marker: bool, + /// Whether this is a binary diff with actual patch content. + /// + /// Observed `git diff --binary` output: + /// + /// ```text + /// diff --git a/image.png b/image.png + /// new file mode 100644 + /// index 0000000..7c4530c + /// GIT binary patch + /// literal 67 + /// zcmV-J0KET+... + /// + /// literal 0 + /// KcmV+b0RR6000031 + /// ``` + is_binary_patch: bool, +} + +impl Default for GitHeader<'_, T> { + fn default() -> Self { + Self { + diff_git_line: None, + rename_from: None, + rename_to: None, + copy_from: None, + copy_to: None, + old_mode: None, + new_mode: None, + new_file_mode: None, + deleted_file_mode: None, + is_binary_marker: false, + is_binary_patch: false, + } + } +} + +impl<'a, T: Text + ?Sized> GitHeader<'a, T> { + /// Parses git extended headers incrementally from the current position. + /// + /// Consumes the `diff --git` line and all recognized extended header lines, + /// stopping at the first unrecognized line (typically `---`/`+++`/`@@` + /// or the next `diff --git`). + /// + /// Returns the parsed header and the number of bytes consumed. + fn parse(input: &'a T) -> (Self, usize) { + let mut header = GitHeader::default(); + let mut consumed = 0; + + for line in input.lines() { + let trimmed = strip_line_ending(line); + + if let Some(rest) = trimmed.strip_prefix("diff --git ") { + // Only accept the first `diff --git` line. + // A second one means we've reached the next patch. + if header.diff_git_line.is_some() { + break; + } + header.diff_git_line = Some(rest); + } else if let Some(path) = trimmed.strip_prefix("rename from ") { + header.rename_from = Some(path); + } else if let Some(path) = trimmed.strip_prefix("rename to ") { + header.rename_to = Some(path); + } else if let Some(path) = trimmed.strip_prefix("copy from ") { + header.copy_from = Some(path); + } else if let Some(path) = trimmed.strip_prefix("copy to ") { + header.copy_to = Some(path); + } else if let Some(mode) = trimmed.strip_prefix("old mode ") { + header.old_mode = Some(mode); + } else if let Some(mode) = trimmed.strip_prefix("new mode ") { + header.new_mode = Some(mode); + } else if let Some(mode) = trimmed.strip_prefix("new file mode ") { + header.new_file_mode = Some(mode); + } else if let Some(mode) = trimmed.strip_prefix("deleted file mode ") { + header.deleted_file_mode = Some(mode); + } else if trimmed.starts_with("index ") + || trimmed.starts_with("similarity index ") + || trimmed.starts_with("dissimilarity index ") + { + // Recognized but nothing to extract. + } else if trimmed.starts_with("Binary files ") { + header.is_binary_marker = true; + } else if trimmed.starts_with("GIT binary patch") { + header.is_binary_patch = true; + } else { + // Unrecognized line: End of extended headers + // (typically `---`/`+++`/`@@` or trailing content). + break; + } + + consumed += line.len(); + } + + (header, consumed) + } +} + +/// Determines the file operation from git headers and patch paths. +fn extract_file_op_gitdiff<'a, T: Text + ?Sized>( + header: &GitHeader<'a, T>, + patch: &Patch<'a, T>, +) -> Result, PatchSetParseError> { + // Git headers are authoritative for rename/copy. + // Paths may be quoted (e.g., `rename from "foo\tbar.txt"`). + if let (Some(from), Some(to)) = (header.rename_from, header.rename_to) { + return Ok(FileOperation::Rename { + from: escaped_filename(from)?, + to: escaped_filename(to)?, + }); + } + if let (Some(from), Some(to)) = (header.copy_from, header.copy_to) { + return Ok(FileOperation::Copy { + from: escaped_filename(from)?, + to: escaped_filename(to)?, + }); + } + + // Try ---/+++ paths first + if patch.original().is_some() || patch.modified().is_some() { + return extract_file_op_unidiff(patch.original_path(), patch.modified_path()); + } + + // Fall back to `diff --git ` for mode-only and empty file changes + let Some((original, modified)) = header.diff_git_line.and_then(parse_diff_git_path) else { + return Err(PatchSetParseErrorKind::InvalidDiffGitPath.into()); + }; + + if header.new_file_mode.is_some() { + Ok(FileOperation::Create(modified)) + } else if header.deleted_file_mode.is_some() { + Ok(FileOperation::Delete(original)) + } else { + Ok(FileOperation::Modify { original, modified }) + } +} + +/// Parses file modes from git extended headers. +fn parse_file_modes( + header: &GitHeader<'_, T>, +) -> Result<(Option, Option), PatchSetParseError> { + let parse_mode = |mode: &T| -> Result { + mode.as_str() + .ok_or_else(|| { + let s = String::from_utf8_lossy(mode.as_bytes()).into_owned(); + PatchSetParseErrorKind::InvalidFileMode(s) + })? + .parse::() + }; + let old_mode = header + .old_mode + .or(header.deleted_file_mode) + .map(parse_mode) + .transpose()?; + let new_mode = header + .new_mode + .or(header.new_file_mode) + .map(parse_mode) + .transpose()?; + Ok((old_mode, new_mode)) +} + +/// Extracts both old and new paths from `diff --git` line content. +/// +/// ## Assumption #1: old and new paths are the same +/// +/// This extraction has one strong assumption: +/// Beside their prefixes, old and new paths are the same. +/// +/// From [git-diff format documentation]: +/// +/// > The `a/` and `b/` filenames are the same unless rename/copy is involved. +/// > Especially, even for a creation or a deletion, `/dev/null` is not used +/// > in place of the `a/` or `b/` filenames. +/// > +/// > When a rename/copy is involved, file1 and file2 show the name of the +/// > source file of the rename/copy and the name of the file that the +/// > rename/copy produces, respectively. +/// +/// Since rename/copy operations use `rename from/to` and `copy from/to` headers +/// we have handled earlier in [`extract_file_op_gitdiff`], +/// (which have no `a/`/`b/` prefix per git spec), +/// +/// this extraction is only used +/// * when unified diff headers (`---`/`+++`) are absent +/// * Only for mode-only and empty file cases +/// +/// [git-diff format documentation]: https://git-scm.com/docs/diff-format +/// +/// ## Assumption #2: the longest common path suffix is the shared path +/// +/// When custom prefixes contain spaces, +/// multiple splits may produce valid path suffixes. +/// +/// Example: `src/foo.rs src/foo.rs src/foo.rs src/foo.rs` +/// +/// Three splits all produce valid path suffixes (contain `/`): +/// +/// * Position 10 +/// * old path: `src/foo.rs` +/// * new path: `src/foo.rs src/foo.rs src/foo.rs` +/// * common suffix: `foo.rs` +/// * Position 21 +/// * old path: `src/foo.rs src/foo.rs` +/// * new path: `src/foo.rs src/foo.rs` +/// * common suffix: `foo.rs src/foo.rs` +/// * Position 32 +/// * old path: `src/foo.rs src/foo.rs src/foo.rs` +/// * new path: `src/foo.rs` +/// * common suffix: `foo.rs` +/// +/// We observed that `git apply` would pick position 21, +/// which has the longest path suffix, +/// hence this heuristic. +/// +/// ## Supported formats +/// +/// * `a/ b/` (default prefix) +/// * ` ` (`git diff --no-prefix`) +/// * ` ` (custom prefix) +/// * `"" ""` (quoted, with escapes) +/// * Mixed quoted/unquoted +fn parse_diff_git_path<'a, T: Text + ?Sized>(line: &'a T) -> Option<(Cow<'a, T>, Cow<'a, T>)> { + if line.starts_with("\"") || line.ends_with("\"") { + parse_quoted_diff_git_path(line) + } else { + parse_unquoted_diff_git_path(line) + } +} + +/// See [`parse_diff_git_path`]. +fn parse_unquoted_diff_git_path<'a, T: Text + ?Sized>( + line: &'a T, +) -> Option<(Cow<'a, T>, Cow<'a, T>)> { + let bytes = line.as_bytes(); + let mut best_match = None; + let mut longest_path_len = 0; + + for (i, _) in bytes.iter().enumerate().filter(|(_, &b)| b == b' ') { + let (left, right_with_space) = line.split_at(i); + // skip the space + let (_, right) = right_with_space.split_at(1); + if left.is_empty() || right.is_empty() { + continue; + } + // Select split with longest common path suffix. + // On ties (`>` not `>=`), the first (leftmost) split wins. + // + // Observed: `git apply` rejects ambiguous splits: + // + // > git diff header lacks filename information + // > when removing N leading pathname component(s)" + // + // Also in : + // + // > The a/ and b/ filenames are the same unless rename/copy is involved. + // + // This kinda tells git-apply's path resolution is strip-level-aware, + // unlike ours. + // + // See `fail_ambiguous_suffix_tie` compat test. + if let Some(path) = longest_common_path_suffix(left, right) { + if path.len() > longest_path_len { + longest_path_len = path.len(); + best_match = Some((left, right)); + } + } + } + + best_match.map(|(l, r)| (Cow::Borrowed(l), Cow::Borrowed(r))) +} + +/// See [`parse_diff_git_path`]. +fn parse_quoted_diff_git_path<'a, T: Text + ?Sized>( + line: &'a T, +) -> Option<(Cow<'a, T>, Cow<'a, T>)> { + let (left_raw, right_raw) = if line.starts_with("\"") { + // First token is quoted. + let bytes = line.as_bytes(); + let mut i = 1; // skip starting `"` + + // Find the closing `"`. + // The only escape where literal `"` appears right after `\` is `\"`, + // an octal double quote `\042` has 3 digits. + // So, `i += 2` correctly skips past `"` and octal digits. + let end = loop { + match bytes.get(i)? { + b'"' => break i + 1, + b'\\' => i += 2, + _ => i += 1, + } + }; + let (first, rest) = line.split_at(end); + let rest = rest.strip_prefix(" ")?; + (first, rest) + } else if let Some(pos) = line.find(" \"") { + // First token is unquoted. The second must be quoted. + let (left, rest) = line.split_at(pos); + let (_, right) = rest.split_at(1); // skip the space + (left, right) + } else { + // Malformed: ends with `"` but no valid quoted path found + return None; + }; + + let left = escaped_filename(left_raw).ok()?; + let right = escaped_filename(right_raw).ok()?; + + // Verify both sides share the same path. + longest_common_path_suffix(left.as_ref(), right.as_ref())?; + Some((left, right)) +} + +/// Extracts the longest common path suffix shared by `a` and `b`. +/// +/// Returns `None` if no valid common path exists. +/// +/// * If both strings are identical, returns the whole string +/// (e.g., `file.rs` vs `file.rs` → `file.rs`). +/// * Otherwise, returns the portion after the first `/` in the common suffix +/// (e.g., `foo/bar.rs` vs `fooo/bar.rs` → `bar.rs`). +fn longest_common_path_suffix<'a, T: Text + ?Sized>(a: &'a T, b: &T) -> Option<&'a T> { + if a.is_empty() || b.is_empty() { + return None; + } + + let suffix_len = a + .as_bytes() + .iter() + .rev() + .zip(b.as_bytes().iter().rev()) + .take_while(|(x, y)| x == y) + .count(); + + if suffix_len == 0 { + return None; + } + + // Identical strings + if suffix_len == a.len() && a.len() == b.len() { + return Some(a); + } + + // Find first '/' in suffix and return path after it + let suffix_start = a.len() - suffix_len; + let (_, suffix) = a.split_at(suffix_start); + suffix + .split_at_exclusive("/") + .map(|(_, path)| path) + .filter(|p| !p.is_empty()) +} + +/// Extracts the file operation for a binary patch from git headers. +/// +/// Binary patches have no `---`/`+++` headers, so paths come from the +/// `diff --git` line or rename/copy headers. +fn extract_file_op_binary<'a, T: Text + ?Sized>( + header: &GitHeader<'a, T>, + abs_patch_start: usize, +) -> Result, PatchSetParseError> { + // Git headers are authoritative for rename/copy. + // Paths may be quoted (e.g., `rename from "foo\tbar.txt"`). + if let (Some(from), Some(to)) = (header.rename_from, header.rename_to) { + return Ok(FileOperation::Rename { + from: escaped_filename(from)?, + to: escaped_filename(to)?, + }); + } + if let (Some(from), Some(to)) = (header.copy_from, header.copy_to) { + return Ok(FileOperation::Copy { + from: escaped_filename(from)?, + to: escaped_filename(to)?, + }); + } + + let Some((original, modified)) = header.diff_git_line.and_then(parse_diff_git_path) else { + return Err(PatchSetParseError::new( + PatchSetParseErrorKind::InvalidDiffGitPath, + abs_patch_start..abs_patch_start, + )); + }; + + if header.new_file_mode.is_some() { + Ok(FileOperation::Create(modified)) + } else if header.deleted_file_mode.is_some() { + Ok(FileOperation::Delete(original)) + } else { + Ok(FileOperation::Modify { original, modified }) + } +} + /// Extracts the file operation from a patch based on its header paths. fn extract_file_op_unidiff<'a, T: Text + ?Sized>( original: Option<&Cow<'a, T>>, @@ -257,3 +766,13 @@ fn extract_file_op_unidiff<'a, T: Text + ?Sized>( } } } + +/// Strips the trailing `\n` from a line yielded by [`Text::lines`]. +/// +/// [`Text::lines`] includes line endings; strip for matching. +fn strip_line_ending(line: &T) -> &T { + // TODO: GNU patch strips trailing CRs from CRLF patches automatically. + // We should consider adding compat tests for GNU patch. + // And `git apply` seems to reject. Worth adding tests as well. + line.strip_suffix("\n").unwrap_or(line) +} diff --git a/src/patch_set/tests.rs b/src/patch_set/tests.rs index 52f1c01f..ee7f75bb 100644 --- a/src/patch_set/tests.rs +++ b/src/patch_set/tests.rs @@ -1,6 +1,6 @@ //! Tests for patchset parsing. -use super::{error::PatchSetParseErrorKind, FileOperation, ParseOptions, PatchKind, PatchSet}; +use super::{error::PatchSetParseErrorKind, FileOperation, ParseOptions, PatchSet}; mod file_operation { use super::*; @@ -464,6 +464,313 @@ In a hole in the ground there lived a hobbit } } +mod patchset_gitdiff { + use super::*; + fn parse_gitdiff(input: &str) -> Vec> { + PatchSet::parse(input, ParseOptions::gitdiff()) + .collect::, _>>() + .unwrap() + } + + /// `parse_one` must stop at `diff --git` boundaries so that + /// back-to-back patches are split correctly. + /// Without this, the second patch's `diff --git` line would be + /// swallowed as trailing junk by the first patch's hunk parser. + #[test] + fn multi_file_stops_at_diff_git_boundary() { + let input = "\ +diff --git a/foo b/foo +--- a/foo ++++ b/foo +@@ -1 +1 @@ +-old foo ++new foo +diff --git a/bar b/bar +--- a/bar ++++ b/bar +@@ -1 +1 @@ +-old bar ++new bar +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 2); + } + + #[test] + fn pure_rename() { + let input = "\ +diff --git a/old.rs b/new.rs +similarity index 100% +rename from old.rs +rename to new.rs +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert_eq!( + patches[0].operation(), + &FileOperation::Rename { + from: "old.rs".into(), + to: "new.rs".into(), + } + ); + } + + /// Empty file creation has no ---/+++ headers, so the path comes + /// from the `diff --git` line and retains the `b/` prefix. + /// Callers use `strip_prefix(1)` to remove it. + #[test] + fn new_empty_file() { + let input = "\ +diff --git a/empty b/empty +new file mode 100644 +index 0000000..e69de29 +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert_eq!( + patches[0].operation(), + &FileOperation::Create("b/empty".into()) + ); + let p = patches[0].patch().as_text().unwrap(); + assert!(p.hunks().is_empty()); + } + + #[test] + fn rename_then_modify() { + // Rename with no hunks followed by a modify with hunks. + // Tests that offset advances correctly across both. + let input = "\ +diff --git a/old.rs b/new.rs +similarity index 100% +rename from old.rs +rename to new.rs +diff --git a/foo b/foo +--- a/foo ++++ b/foo +@@ -1 +1 @@ +-old ++new +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 2); + assert!(matches!( + patches[0].operation(), + FileOperation::Rename { .. } + )); + assert!(matches!( + patches[1].operation(), + FileOperation::Modify { .. } + )); + } + + /// Quoted path containing an escaped quote (`\"`). + /// Git produces this for filenames with literal double quotes. + /// + /// Observed with git 2.53.0: + /// $ printf 'x' > 'with"quote' && git add -A + /// $ git diff --cached | head -1 + /// diff --git "a/with\"quote" "b/with\"quote" + #[test] + fn path_quoted_with_escaped_quote() { + let input = "\ +diff --git \"a/with\\\"quote\" \"b/with\\\"quote\" +--- \"a/with\\\"quote\" ++++ \"b/with\\\"quote\" +@@ -1 +1 @@ +-old ++new +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert_eq!( + patches[0].operation(), + &FileOperation::Modify { + original: "a/with\"quote".to_owned().into(), + modified: "b/with\"quote".to_owned().into(), + } + ); + } + + /// Copy operation extracted from git extended headers. + #[test] + fn copy_operation() { + let input = "\ +diff --git a/original.rs b/copied.rs +similarity index 100% +copy from original.rs +copy to copied.rs +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert_eq!( + patches[0].operation(), + &FileOperation::Copy { + from: "original.rs".into(), + to: "copied.rs".into(), + } + ); + } + + /// Rename with both paths quoted (escapes in both). + #[test] + fn rename_both_quoted() { + let input = "\ +diff --git \"a/foo\\tbar.rs\" \"b/baz\\tqux.rs\" +similarity index 100% +rename from \"foo\\tbar.rs\" +rename to \"baz\\tqux.rs\" +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert_eq!( + patches[0].operation(), + &FileOperation::Rename { + from: "foo\tbar.rs".into(), + to: "baz\tqux.rs".into(), + } + ); + } + + /// Rename from quoted (has escape) to unquoted (plain). + #[test] + fn rename_quoted_to_unquoted() { + let input = "\ +diff --git \"a/foo\\tbar.rs\" b/normal.rs +similarity index 100% +rename from \"foo\\tbar.rs\" +rename to normal.rs +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert_eq!( + patches[0].operation(), + &FileOperation::Rename { + from: "foo\tbar.rs".into(), + to: "normal.rs".into(), + } + ); + } + + /// Rename from unquoted to quoted (has escape). + #[test] + fn rename_unquoted_to_quoted() { + let input = "\ +diff --git a/normal.rs \"b/foo\\tbar.rs\" +similarity index 100% +rename from normal.rs +rename to \"foo\\tbar.rs\" +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert_eq!( + patches[0].operation(), + &FileOperation::Rename { + from: "normal.rs".into(), + to: "foo\tbar.rs".into(), + } + ); + } + + /// Deleted file: `deleted file mode` header + /dev/null in +++. + #[test] + fn deleted_file_with_mode() { + let input = "\ +diff --git a/gone.rs b/gone.rs +deleted file mode 100644 +index abc1234..0000000 +--- a/gone.rs ++++ /dev/null +@@ -1 +0,0 @@ +-content +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert!(patches[0].operation().is_delete()); + assert_eq!( + patches[0].old_mode(), + Some(&super::super::FileMode::Regular) + ); + } + + /// Mode-only change: no hunks, no ---/+++ headers. + /// File operation falls back to `diff --git` line paths. + #[test] + fn mode_only_change() { + let input = "\ +diff --git a/script.sh b/script.sh +old mode 100644 +new mode 100755 +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert!(patches[0].operation().is_modify()); + assert_eq!( + patches[0].old_mode(), + Some(&super::super::FileMode::Regular), + ); + assert_eq!( + patches[0].new_mode(), + Some(&super::super::FileMode::Executable), + ); + let p = patches[0].patch().as_text().unwrap(); + assert!(p.hunks().is_empty()); + } + + /// New file with content: `new file mode` header + /dev/null in ---. + #[test] + fn new_file_with_content() { + let input = "\ +diff --git a/new.rs b/new.rs +new file mode 100644 +index 0000000..abc1234 +--- /dev/null ++++ b/new.rs +@@ -0,0 +1 @@ ++hello +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert!(patches[0].operation().is_create()); + assert_eq!( + patches[0].new_mode(), + Some(&super::super::FileMode::Regular), + ); + } + + /// `diff --git` line with no-prefix paths (`git diff --no-prefix`). + /// Fallback path parsing works when ---/+++ are absent. + #[test] + fn no_prefix_empty_file() { + let input = "\ +diff --git file.rs file.rs +new file mode 100644 +index 0000000..e69de29 +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert!(patches[0].operation().is_create()); + } + + #[test] + fn binary_emits_marker() { + let input = "\ +diff --git a/img.png b/img.png +Binary files a/img.png and b/img.png differ +diff --git a/foo b/foo +--- a/foo ++++ b/foo +@@ -1 +1 @@ +-old ++new +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 2); + assert!(patches[0].patch().is_binary()); + assert!(patches[0].operation().is_modify()); + assert!(!patches[1].patch().is_binary()); + } +} + mod patchset_unidiff_bytes { use super::*; use crate::patch::Line; @@ -502,7 +809,7 @@ mod patchset_unidiff_bytes { .unwrap(); assert_eq!(patches.len(), 1); - let PatchKind::Text(patch) = patches[0].patch(); + let patch = patches[0].patch().as_text().unwrap(); let lines = patch.hunks()[0].lines(); assert_eq!(lines[0], Line::Delete(b"old\x89PNG\n".as_slice())); assert_eq!(lines[1], Line::Insert(b"new\x89PNG\n".as_slice())); diff --git a/tests/compat/common.rs b/tests/compat/common.rs index 60cd2e20..0993e2a7 100644 --- a/tests/compat/common.rs +++ b/tests/compat/common.rs @@ -2,16 +2,27 @@ use std::{ fs, + io::Write, path::{Path, PathBuf}, - process::Command, + process::{Command, Stdio}, sync::Once, }; use diffy::patch_set::{FileOperation, ParseOptions, PatchKind, PatchSet, PatchSetParseError}; +/// Which external tool to compare against. +#[derive(Clone, Copy)] +pub enum CompatMode { + /// `git apply` with `ParseOptions::gitdiff()` + Git, + /// GNU `patch` with `ParseOptions::unidiff()` + GnuPatch, +} + /// A test case with fluent builder API. pub struct Case<'a> { case_name: &'a str, + mode: CompatMode, /// Strip level for path prefixes (default: 0) strip_level: u32, /// Whether diffy is expected to succeed (default: true) @@ -21,20 +32,37 @@ pub struct Case<'a> { } impl<'a> Case<'a> { + /// Create a test case for `git apply` comparison. + pub fn git(name: &'a str) -> Self { + Self { + case_name: name, + mode: CompatMode::Git, + strip_level: 0, + expect_success: true, + expect_compat: true, + } + } + /// Create a test case for GNU patch comparison. pub fn gnu_patch(name: &'a str) -> Self { Self { case_name: name, + mode: CompatMode::GnuPatch, strip_level: 0, expect_success: true, expect_compat: true, } } - /// Get the case directory path. + /// Get the case directory path based on mode. fn case_dir(&self) -> PathBuf { + let subdir = match self.mode { + CompatMode::Git => "git", + CompatMode::GnuPatch => "gnu_patch", + }; PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("tests/compat/gnu_patch") + .join("tests/compat") + .join(subdir) .join(self.case_name) } @@ -62,12 +90,19 @@ impl<'a> Case<'a> { .unwrap_or_else(|e| panic!("failed to read {}: {e}", patch_path.display())); let case_name = self.case_name; + let prefix = match self.mode { + CompatMode::Git => "git", + CompatMode::GnuPatch => "gnu", + }; let temp_base = temp_base(); - let diffy_output = temp_base.join(format!("gnu-{case_name}-diffy")); + let diffy_output = temp_base.join(format!("{prefix}-{case_name}-diffy")); create_output_dir(&diffy_output); - let opts = ParseOptions::unidiff(); + let opts = match self.mode { + CompatMode::Git => ParseOptions::gitdiff(), + CompatMode::GnuPatch => ParseOptions::unidiff(), + }; // Apply with diffy let diffy_result = apply_diffy(&in_dir, &patch, &diffy_output, opts, self.strip_level); @@ -81,12 +116,19 @@ impl<'a> Case<'a> { // In CI mode, also verify external tool behavior if is_ci() { - let external_output = temp_base.join(format!("gnu-{case_name}-external")); + let external_output = temp_base.join(format!("{prefix}-{case_name}-external")); create_output_dir(&external_output); - print_patch_version(); - let external_result = - gnu_patch_apply(&in_dir, &patch_path, &external_output, self.strip_level); + let external_result = match self.mode { + CompatMode::Git => { + print_git_version(); + git_apply(&external_output, &patch, self.strip_level, &in_dir) + } + CompatMode::GnuPatch => { + print_patch_version(); + gnu_patch_apply(&in_dir, &patch_path, &external_output, self.strip_level) + } + }; // For success cases where both succeed and are expected to be compatible, // verify outputs match @@ -149,6 +191,52 @@ fn gnu_patch_apply( } } +fn git_apply( + output_dir: &Path, + patch: &[u8], + strip_level: u32, + in_dir: &Path, +) -> Result<(), String> { + copy_input_files(in_dir, output_dir, &["patch"]); + + let mut cmd = Command::new("git"); + cmd.env("GIT_CONFIG_NOSYSTEM", "1"); + cmd.env("GIT_CONFIG_GLOBAL", "/dev/null"); + cmd.current_dir(output_dir); + cmd.args(["apply", &format!("-p{strip_level}"), "-"]); + cmd.stdin(Stdio::piped()); + cmd.stdout(Stdio::piped()); + cmd.stderr(Stdio::piped()); + + let mut child = cmd.spawn().expect("failed to spawn git apply"); + child.stdin.as_mut().unwrap().write_all(patch).unwrap(); + + let output = child.wait_with_output().unwrap(); + if output.status.success() { + Ok(()) + } else { + Err(String::from_utf8_lossy(&output.stderr).to_string()) + } +} + +fn print_git_version() { + static ONCE: Once = Once::new(); + ONCE.call_once(|| { + let output = Command::new("git").arg("--version").output(); + match output { + Ok(o) if o.status.success() => { + let version = String::from_utf8_lossy(&o.stdout); + eprintln!( + "git version: {}", + version.lines().next().unwrap_or("unknown") + ); + } + Ok(o) => eprintln!("git --version failed: {}", o.status), + Err(e) => eprintln!("git command not found: {e}"), + } + }); +} + fn print_patch_version() { static ONCE: Once = Once::new(); ONCE.call_once(|| { @@ -172,6 +260,7 @@ fn print_patch_version() { pub enum TestError { Parse(PatchSetParseError), Apply(diffy::ApplyError), + Io(std::io::Error), } impl std::fmt::Display for TestError { @@ -179,6 +268,7 @@ impl std::fmt::Display for TestError { match self { TestError::Parse(e) => write!(f, "parse error: {e}"), TestError::Apply(e) => write!(f, "apply error: {e}"), + TestError::Io(e) => write!(f, "io error: {e}"), } } } @@ -273,9 +363,7 @@ pub fn apply_diffy( PatchKind::Text(patch) => { let original = if let Some(name) = original_name { let original_path = in_dir.join(bytes_to_path(name)); - fs::read(&original_path).unwrap_or_else(|e| { - panic!("failed to read {}: {e}", original_path.display()) - }) + fs::read(&original_path).map_err(TestError::Io)? } else { Vec::new() }; @@ -288,6 +376,9 @@ pub fn apply_diffy( } fs::write(&result_path, &result).unwrap(); } + PatchKind::Binary => { + // No patch data to apply — nothing to do. + } } } diff --git a/tests/compat/git/fail_ambiguous_suffix_tie/in/foo.patch b/tests/compat/git/fail_ambiguous_suffix_tie/in/foo.patch new file mode 100644 index 00000000..fe6658e8 --- /dev/null +++ b/tests/compat/git/fail_ambiguous_suffix_tie/in/foo.patch @@ -0,0 +1,3 @@ +diff --git a/x b/x c/x +new file mode 100644 +index 0000000..e69de29 diff --git a/tests/compat/git/fail_ambiguous_suffix_tie/out/x c/x b/tests/compat/git/fail_ambiguous_suffix_tie/out/x c/x new file mode 100644 index 00000000..e69de29b diff --git a/tests/compat/git/fail_both_devnull/in/foo.patch b/tests/compat/git/fail_both_devnull/in/foo.patch new file mode 100644 index 00000000..26b28273 --- /dev/null +++ b/tests/compat/git/fail_both_devnull/in/foo.patch @@ -0,0 +1,6 @@ +diff --git a/foo b/foo +--- /dev/null ++++ /dev/null +@@ -1 +1 @@ +-old ++new diff --git a/tests/compat/git/fail_prefix_no_slash/in/foo.patch b/tests/compat/git/fail_prefix_no_slash/in/foo.patch new file mode 100644 index 00000000..c6fb4d27 --- /dev/null +++ b/tests/compat/git/fail_prefix_no_slash/in/foo.patch @@ -0,0 +1,13 @@ +diff --git fooold.txt barold.txt +index 4083766..40c94fa 100644 +--- fooold.txt ++++ barold.txt +@@ -2,7 +2,7 @@ line1 + line2 + line3 + line4 +-line5 ++changed + line6 + line7 + line8 diff --git a/tests/compat/git/fail_prefix_no_slash/in/old.txt b/tests/compat/git/fail_prefix_no_slash/in/old.txt new file mode 100644 index 00000000..4083766a --- /dev/null +++ b/tests/compat/git/fail_prefix_no_slash/in/old.txt @@ -0,0 +1,10 @@ +line1 +line2 +line3 +line4 +line5 +line6 +line7 +line8 +line9 +line10 diff --git a/tests/compat/git/format_patch_diff_in_message/in/file.txt b/tests/compat/git/format_patch_diff_in_message/in/file.txt new file mode 100644 index 00000000..3367afdb --- /dev/null +++ b/tests/compat/git/format_patch_diff_in_message/in/file.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/format_patch_diff_in_message/in/foo.patch b/tests/compat/git/format_patch_diff_in_message/in/foo.patch new file mode 100644 index 00000000..ce333713 --- /dev/null +++ b/tests/compat/git/format_patch_diff_in_message/in/foo.patch @@ -0,0 +1,20 @@ +From 8a14b135fe7ba10bab09a77c4a687faaf1d92a26 Mon Sep 17 00:00:00 2001 +From: Test +Date: Wed, 1 Jan 2020 00:00:00 +0000 +Subject: [PATCH] Fix diff --git parsing bug + +The line `diff --git a/x b/x` in messages was incorrectly parsed. +--- + file.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/file.txt b/file.txt +index 3367afd..3e75765 100644 +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-old ++new +-- +2.52.0 + diff --git a/tests/compat/git/format_patch_diff_in_message/out/file.txt b/tests/compat/git/format_patch_diff_in_message/out/file.txt new file mode 100644 index 00000000..3e757656 --- /dev/null +++ b/tests/compat/git/format_patch_diff_in_message/out/file.txt @@ -0,0 +1 @@ +new diff --git a/tests/compat/git/format_patch_mbox/in/a.txt b/tests/compat/git/format_patch_mbox/in/a.txt new file mode 100644 index 00000000..ffc74965 --- /dev/null +++ b/tests/compat/git/format_patch_mbox/in/a.txt @@ -0,0 +1 @@ +old a diff --git a/tests/compat/git/format_patch_mbox/in/b.txt b/tests/compat/git/format_patch_mbox/in/b.txt new file mode 100644 index 00000000..568e6192 --- /dev/null +++ b/tests/compat/git/format_patch_mbox/in/b.txt @@ -0,0 +1 @@ +old b diff --git a/tests/compat/git/format_patch_mbox/in/c.txt b/tests/compat/git/format_patch_mbox/in/c.txt new file mode 100644 index 00000000..89cb37d7 --- /dev/null +++ b/tests/compat/git/format_patch_mbox/in/c.txt @@ -0,0 +1 @@ +old c diff --git a/tests/compat/git/format_patch_mbox/in/foo.patch b/tests/compat/git/format_patch_mbox/in/foo.patch new file mode 100644 index 00000000..720648ea --- /dev/null +++ b/tests/compat/git/format_patch_mbox/in/foo.patch @@ -0,0 +1,56 @@ +From aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa Mon Sep 17 00:00:00 2001 +From: Alice +Date: Mon, 1 Jan 2024 00:00:00 +0000 +Subject: [PATCH 1/3] first patch + +First commit message. +--- + a.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/a.txt b/a.txt +--- a/a.txt ++++ b/a.txt +@@ -1 +1 @@ +-old a ++new a +-- +2.40.0 + +From bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb Mon Sep 17 00:00:00 2001 +From: Bob +Date: Mon, 1 Jan 2024 00:00:01 +0000 +Subject: [PATCH 2/3] second patch + +Second commit message. +--- + b.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/b.txt b/b.txt +--- a/b.txt ++++ b/b.txt +@@ -1 +1 @@ +-old b ++new b +-- +2.40.0 + +From cccccccccccccccccccccccccccccccccccccccc Mon Sep 17 00:00:00 2001 +From: Carol +Date: Mon, 1 Jan 2024 00:00:02 +0000 +Subject: [PATCH 3/3] third patch + +Third commit message. +--- + c.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/c.txt b/c.txt +--- a/c.txt ++++ b/c.txt +@@ -1 +1 @@ +-old c ++new c +-- +2.40.0 diff --git a/tests/compat/git/format_patch_mbox/out/a.txt b/tests/compat/git/format_patch_mbox/out/a.txt new file mode 100644 index 00000000..56db76ce --- /dev/null +++ b/tests/compat/git/format_patch_mbox/out/a.txt @@ -0,0 +1 @@ +new a diff --git a/tests/compat/git/format_patch_mbox/out/b.txt b/tests/compat/git/format_patch_mbox/out/b.txt new file mode 100644 index 00000000..81e3be46 --- /dev/null +++ b/tests/compat/git/format_patch_mbox/out/b.txt @@ -0,0 +1 @@ +new b diff --git a/tests/compat/git/format_patch_mbox/out/c.txt b/tests/compat/git/format_patch_mbox/out/c.txt new file mode 100644 index 00000000..8ed67817 --- /dev/null +++ b/tests/compat/git/format_patch_mbox/out/c.txt @@ -0,0 +1 @@ +new c diff --git a/tests/compat/git/format_patch_multiple_separators/in/file.txt b/tests/compat/git/format_patch_multiple_separators/in/file.txt new file mode 100644 index 00000000..3367afdb --- /dev/null +++ b/tests/compat/git/format_patch_multiple_separators/in/file.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/format_patch_multiple_separators/in/foo.patch b/tests/compat/git/format_patch_multiple_separators/in/foo.patch new file mode 100644 index 00000000..a0da5b94 --- /dev/null +++ b/tests/compat/git/format_patch_multiple_separators/in/foo.patch @@ -0,0 +1,21 @@ +From 6bfbbfa49a16bb8173145a933fe5ad918ad48a31 Mon Sep 17 00:00:00 2001 +From: Test +Date: Wed, 1 Jan 2020 00:00:00 +0000 +Subject: [PATCH] Add content with --- markers + +--- + file.txt | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/file.txt b/file.txt +index 3367afd..c4d4ea8 100644 +--- a/file.txt ++++ b/file.txt +@@ -1 +1,3 @@ +-old ++--- ++new ++--- +-- +2.52.0 + diff --git a/tests/compat/git/format_patch_multiple_separators/out/file.txt b/tests/compat/git/format_patch_multiple_separators/out/file.txt new file mode 100644 index 00000000..c4d4ea8d --- /dev/null +++ b/tests/compat/git/format_patch_multiple_separators/out/file.txt @@ -0,0 +1,3 @@ +--- +new +--- diff --git a/tests/compat/git/format_patch_preamble/in/file.txt b/tests/compat/git/format_patch_preamble/in/file.txt new file mode 100644 index 00000000..3367afdb --- /dev/null +++ b/tests/compat/git/format_patch_preamble/in/file.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/format_patch_preamble/in/foo.patch b/tests/compat/git/format_patch_preamble/in/foo.patch new file mode 100644 index 00000000..9d904250 --- /dev/null +++ b/tests/compat/git/format_patch_preamble/in/foo.patch @@ -0,0 +1,20 @@ +From ddbc9053359329dd016ed89f0d6e460b3b8ff5e3 Mon Sep 17 00:00:00 2001 +From: Test +Date: Wed, 1 Jan 2020 00:00:00 +0000 +Subject: [PATCH] Add new content + +This is the commit body. +--- + file.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/file.txt b/file.txt +index 3367afd..3e75765 100644 +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-old ++new +-- +2.52.0 + diff --git a/tests/compat/git/format_patch_preamble/out/file.txt b/tests/compat/git/format_patch_preamble/out/file.txt new file mode 100644 index 00000000..3e757656 --- /dev/null +++ b/tests/compat/git/format_patch_preamble/out/file.txt @@ -0,0 +1 @@ +new diff --git a/tests/compat/git/format_patch_signature/in/file.txt b/tests/compat/git/format_patch_signature/in/file.txt new file mode 100644 index 00000000..3367afdb --- /dev/null +++ b/tests/compat/git/format_patch_signature/in/file.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/format_patch_signature/in/foo.patch b/tests/compat/git/format_patch_signature/in/foo.patch new file mode 100644 index 00000000..7614287a --- /dev/null +++ b/tests/compat/git/format_patch_signature/in/foo.patch @@ -0,0 +1,19 @@ +From b3bb3125eff3d2648f15af2a6e0cdcdf6ad8fce1 Mon Sep 17 00:00:00 2001 +From: Test +Date: Wed, 1 Jan 2020 00:00:00 +0000 +Subject: [PATCH] modify + +--- + file.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/file.txt b/file.txt +index 3367afd..3e75765 100644 +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-old ++new +-- +2.52.0 + diff --git a/tests/compat/git/format_patch_signature/out/file.txt b/tests/compat/git/format_patch_signature/out/file.txt new file mode 100644 index 00000000..3e757656 --- /dev/null +++ b/tests/compat/git/format_patch_signature/out/file.txt @@ -0,0 +1 @@ +new diff --git a/tests/compat/git/junk_between_files/in/bar.txt b/tests/compat/git/junk_between_files/in/bar.txt new file mode 100644 index 00000000..601d8ee1 --- /dev/null +++ b/tests/compat/git/junk_between_files/in/bar.txt @@ -0,0 +1 @@ +bar line1 diff --git a/tests/compat/git/junk_between_files/in/foo.patch b/tests/compat/git/junk_between_files/in/foo.patch new file mode 100644 index 00000000..dbc59398 --- /dev/null +++ b/tests/compat/git/junk_between_files/in/foo.patch @@ -0,0 +1,17 @@ +diff --git a/foo.txt b/foo.txt +index 1234567..89abcdef 100644 +--- a/foo.txt ++++ b/foo.txt +@@ -1 +1 @@ +-foo line1 ++FOO LINE1 +JUNK BETWEEN FILES!!!! +This preamble text should be ignored +by both git apply and diffy +diff --git a/bar.txt b/bar.txt +index 1234567..89abcdef 100644 +--- a/bar.txt ++++ b/bar.txt +@@ -1 +1 @@ +-bar line1 ++BAR LINE1 diff --git a/tests/compat/git/junk_between_files/in/foo.txt b/tests/compat/git/junk_between_files/in/foo.txt new file mode 100644 index 00000000..b11358e1 --- /dev/null +++ b/tests/compat/git/junk_between_files/in/foo.txt @@ -0,0 +1 @@ +foo line1 diff --git a/tests/compat/git/junk_between_files/out/bar.txt b/tests/compat/git/junk_between_files/out/bar.txt new file mode 100644 index 00000000..76c036d0 --- /dev/null +++ b/tests/compat/git/junk_between_files/out/bar.txt @@ -0,0 +1 @@ +BAR LINE1 diff --git a/tests/compat/git/junk_between_files/out/foo.txt b/tests/compat/git/junk_between_files/out/foo.txt new file mode 100644 index 00000000..787bc665 --- /dev/null +++ b/tests/compat/git/junk_between_files/out/foo.txt @@ -0,0 +1 @@ +FOO LINE1 diff --git a/tests/compat/git/junk_between_hunks/in/file.txt b/tests/compat/git/junk_between_hunks/in/file.txt new file mode 100644 index 00000000..822aed3f --- /dev/null +++ b/tests/compat/git/junk_between_hunks/in/file.txt @@ -0,0 +1,9 @@ +line1 +line2 +line3 +line4 +line5 +line6 +line7 +line8 +line9 diff --git a/tests/compat/git/junk_between_hunks/in/foo.patch b/tests/compat/git/junk_between_hunks/in/foo.patch new file mode 100644 index 00000000..242e959c --- /dev/null +++ b/tests/compat/git/junk_between_hunks/in/foo.patch @@ -0,0 +1,15 @@ +diff --git a/file.txt b/file.txt +index 1234567..89abcdef 100644 +--- a/file.txt ++++ b/file.txt +@@ -1,3 +1,3 @@ +-line1 ++LINE1 + line2 + line3 +JUNK BETWEEN HUNKS +@@ -7,3 +7,3 @@ + line7 +-line8 ++LINE8 + line9 diff --git a/tests/compat/git/junk_between_hunks/out/file.txt b/tests/compat/git/junk_between_hunks/out/file.txt new file mode 100644 index 00000000..2e5e454d --- /dev/null +++ b/tests/compat/git/junk_between_hunks/out/file.txt @@ -0,0 +1,9 @@ +LINE1 +line2 +line3 +line4 +line5 +line6 +line7 +line8 +line9 diff --git a/tests/compat/git/mod.rs b/tests/compat/git/mod.rs new file mode 100644 index 00000000..041edaf3 --- /dev/null +++ b/tests/compat/git/mod.rs @@ -0,0 +1,216 @@ +//! Git compatibility tests. See [`crate`] for test structure and usage. +//! +//! Focus areas: +//! +//! - `diff --git` path parsing edge cases (quotes, spaces, ambiguous prefixes) +//! - `git format-patch` email format (preamble/signature stripping) +//! - Agreement between diffy and `git apply` + +use crate::common::Case; + +#[test] +fn path_no_prefix() { + Case::git("path_no_prefix").run(); +} + +#[test] +fn path_quoted_escapes() { + Case::git("path_quoted_escapes").strip(1).run(); +} + +// Git uses C-style named escapes (\a, \b, \f, \v) for certain control +// characters in quoted filenames. Both `git apply` and GNU patch decode +// these correctly. +// +// Observed with git 2.53.0: +// $ printf 'x' > "$(printf 'bel\a')" && git add -A +// $ git diff --cached | grep '+++' +// +++ "b/bel\a" +// +// diffy now decodes these correctly. +#[test] +fn path_quoted_named_escape() { + Case::git("path_quoted_named_escape").strip(1).run(); +} + +// Git uses 3-digit octal escapes (\000-\377) for bytes that don't have +// a named escape. Both `git apply` and GNU patch decode these correctly. +// +// Observed with git 2.53.0: +// $ printf 'x' > "$(printf 'tl\033')" && git add -A +// $ git diff --cached | grep '+++' +// +++ "b/tl\033" +// +// Found via full-history replay test against llvm/llvm-project +// (commits 17af06ba..229c95ab, 6c031780..0683a1e5). +#[test] +fn path_quoted_octal_escape() { + Case::git("path_quoted_octal_escape").strip(1).run(); +} + +// Git uses `\"` inside quoted filenames for literal double-quote characters. +// +// Observed with git 2.53.0: +// $ printf 'x' > 'with"quote.txt' && git add -A +// $ git diff --cached | grep '+++' +// +++ "b/with\"quote.txt" +#[test] +fn path_quoted_inner_quote() { + Case::git("path_quoted_inner_quote").strip(1).run(); +} + +#[test] +fn path_with_spaces() { + Case::git("path_with_spaces").strip(1).run(); +} + +#[test] +fn path_containing_space_b() { + Case::git("path_containing_space_b").strip(1).run(); +} + +#[test] +fn format_patch_preamble() { + // Ambiguous: where does preamble end? First `\n---\n` - verify matches git + Case::git("format_patch_preamble").strip(1).run(); +} + +#[test] +fn format_patch_diff_in_message() { + // `diff --git` in commit message must NOT trigger early parsing + Case::git("format_patch_diff_in_message").strip(1).run(); +} + +#[test] +fn format_patch_multiple_separators() { + // Git uses first `\n---\n` as separator (observed git mailinfo behavior) + Case::git("format_patch_multiple_separators").strip(1).run(); +} + +// Mbox stream: three concatenated `git format-patch` emails in one file. +// Each email has full headers, commit message, `---` separator, and signature. +// `git apply` splits on `diff --git` boundaries, ignoring inter-email content. +#[test] +fn format_patch_mbox() { + Case::git("format_patch_mbox").strip(1).run(); +} + +#[test] +fn format_patch_signature() { + // Ambiguous: `\n-- \n` could appear in patch content - verify matches git + Case::git("format_patch_signature").strip(1).run(); +} + +#[test] +fn nested_diff_signature() { + // Patch that deletes a diff file containing `-- ` patterns within its content, + // followed by a real email signature at the end. + // + // Tests that we correctly distinguish between: + // - `-- ` appearing as patch content (from inner diff's empty context lines) + // - `-- ` appearing as the actual email signature separator + // + // Both git apply and GNU patch handle this correctly. + Case::git("nested_diff_signature").strip(1).run(); +} + +#[test] +fn path_ambiguous_suffix() { + // Multiple valid splits in `diff --git` line; algorithm picks longest common suffix. + // Tests the pathological case from parse.rs comments where custom prefix + // creates `src/foo.rs src/foo.rs src/foo.rs src/foo.rs` - verify matches git. + Case::git("path_ambiguous_suffix").strip(1).run(); +} + +// Ambiguous `diff --git` line where two splits produce the same suffix length. +// `diff --git a/x b/x c/x` (from `--dst-prefix='b/x c/'` on file `x`): +// split at 3: a/x vs b/x c/x → suffix `x` (len 1) +// split at 7: a/x b/x vs c/x → suffix `x` (len 1) +// +// - git apply: rejects: "git diff header lacks filename information when +// removing 1 leading pathname component") +// - diffy: succeeds, picks first (leftmost) split +#[test] +fn fail_ambiguous_suffix_tie() { + Case::git("fail_ambiguous_suffix_tie") + .strip(1) + .expect_success(true) + .expect_compat(false) + .run(); +} + +// Both --- and +++ point to /dev/null. +// git apply rejects: "dev/null: No such file or directory" +#[test] +fn fail_both_devnull() { + Case::git("fail_both_devnull") + .strip(1) + .expect_success(false) + .run(); +} + +// Mixed quoted/unquoted paths in `diff --git` line and rename headers. +// +// Rename from a file with tab in its name (quoted) to a normal name (unquoted): +// `diff --git "a/foo\tbar.txt" b/normal.txt` +// `rename from "foo\tbar.txt"` +// `rename to normal.txt` +#[test] +fn path_mixed_quoted() { + Case::git("path_mixed_quoted").strip(1).run(); +} + +// Custom prefix with slash (e.g. `--src-prefix=src/ --dst-prefix=dst/`). +// +// Produces `diff --git src/old.txt dst/old.txt` and matching ---/+++ headers. +// Both git apply and diffy handle this correctly with strip(1). +#[test] +fn path_custom_prefix() { + Case::git("path_custom_prefix").strip(1).run(); +} + +// Custom prefix without slash (e.g. `--src-prefix=foo --dst-prefix=bar`). +// +// Produces paths like `fooold.txt` / `barold.txt` with no `/` separator, +// making strip impossible. Both git apply and diffy fail: +// - git apply: "git diff header lacks filename information when removing 1 +// leading pathname component" +// - diffy: paths don't match any input file +#[test] +fn fail_prefix_no_slash() { + Case::git("fail_prefix_no_slash") + .strip(1) + .expect_success(false) + .run(); +} + +// Patch with non-UTF-8 bytes (0x80, 0xff) in hunk content. +// Both git apply and diffy handle raw bytes correctly. +#[test] +fn non_utf8_hunk_content() { + Case::git("non_utf8_hunk_content").strip(1).run(); +} + +// Single-file patch with junk between hunks. +// +// - git apply: errors ("patch fragment without header") +// - diffy: succeeds, ignores trailing junk (matches GNU patch behavior) +#[test] +fn junk_between_hunks() { + Case::git("junk_between_hunks") + .strip(1) + .expect_compat(false) + .run(); +} + +// Multi-file patch with junk/preamble text between different files. +// +// git apply behavior: Ignores content between `diff --git` boundaries. +// In GitDiff mode, splitting occurs at `diff --git`, so junk between +// files becomes trailing content of the previous chunk (harmless). +// +// This is different from junk between HUNKS of the same file (which fails). +#[test] +fn junk_between_files() { + Case::git("junk_between_files").strip(1).run(); +} diff --git a/tests/compat/git/nested_diff_signature/in/example.rs b/tests/compat/git/nested_diff_signature/in/example.rs new file mode 100644 index 00000000..8f3b7ef1 --- /dev/null +++ b/tests/compat/git/nested_diff_signature/in/example.rs @@ -0,0 +1 @@ +fn foo() {} diff --git a/tests/compat/git/nested_diff_signature/in/foo.patch b/tests/compat/git/nested_diff_signature/in/foo.patch new file mode 100644 index 00000000..5d876c61 --- /dev/null +++ b/tests/compat/git/nested_diff_signature/in/foo.patch @@ -0,0 +1,25 @@ +diff --git a/mir-test.diff b/mir-test.diff +deleted file mode 100644 +index 98012d7..0000000 +--- a/mir-test.diff ++++ /dev/null +@@ -1,12 +0,0 @@ +-- // MIR before +-+ // MIR after +- +- fn opt() { +- bb0: { +-- nop; +-- } +-- +-- bb1: { +-- nop; +- } +- } +diff --git a/example.rs b/example.rs +index 8f3b7ef..2a40712 100644 +--- a/example.rs ++++ b/example.rs +@@ -1 +1,2 @@ + fn foo() {} ++fn bar() {} diff --git a/tests/compat/git/nested_diff_signature/in/mir-test.diff b/tests/compat/git/nested_diff_signature/in/mir-test.diff new file mode 100644 index 00000000..98012d7e --- /dev/null +++ b/tests/compat/git/nested_diff_signature/in/mir-test.diff @@ -0,0 +1,12 @@ +- // MIR before ++ // MIR after + + fn opt() { + bb0: { +- nop; +- } +- +- bb1: { +- nop; + } + } diff --git a/tests/compat/git/nested_diff_signature/out/example.rs b/tests/compat/git/nested_diff_signature/out/example.rs new file mode 100644 index 00000000..2a40712e --- /dev/null +++ b/tests/compat/git/nested_diff_signature/out/example.rs @@ -0,0 +1,2 @@ +fn foo() {} +fn bar() {} diff --git a/tests/compat/git/nested_diff_signature/out/mir-test.diff b/tests/compat/git/nested_diff_signature/out/mir-test.diff new file mode 100644 index 00000000..e69de29b diff --git a/tests/compat/git/non_utf8_hunk_content/in/file.bin b/tests/compat/git/non_utf8_hunk_content/in/file.bin new file mode 100644 index 00000000..5ddb01f6 --- /dev/null +++ b/tests/compat/git/non_utf8_hunk_content/in/file.bin @@ -0,0 +1 @@ +hello €ÿ world diff --git a/tests/compat/git/non_utf8_hunk_content/in/foo.patch b/tests/compat/git/non_utf8_hunk_content/in/foo.patch new file mode 100644 index 00000000..0c3f72f2 --- /dev/null +++ b/tests/compat/git/non_utf8_hunk_content/in/foo.patch @@ -0,0 +1,6 @@ +diff --git a/file.bin b/file.bin +--- a/file.bin ++++ b/file.bin +@@ -1 +1 @@ +-hello €ÿ world ++hello €ÿ universe diff --git a/tests/compat/git/non_utf8_hunk_content/out/file.bin b/tests/compat/git/non_utf8_hunk_content/out/file.bin new file mode 100644 index 00000000..715e61f2 --- /dev/null +++ b/tests/compat/git/non_utf8_hunk_content/out/file.bin @@ -0,0 +1 @@ +hello €ÿ universe diff --git a/tests/compat/git/path_ambiguous_suffix/in/foo.patch b/tests/compat/git/path_ambiguous_suffix/in/foo.patch new file mode 100644 index 00000000..a6815eb7 --- /dev/null +++ b/tests/compat/git/path_ambiguous_suffix/in/foo.patch @@ -0,0 +1,3 @@ +diff --git src/foo.rs src/foo.rs src/foo.rs src/foo.rs +new file mode 100644 +index 0000000..e69de29 diff --git a/tests/compat/git/path_ambiguous_suffix/out/foo.rs src/foo.rs b/tests/compat/git/path_ambiguous_suffix/out/foo.rs src/foo.rs new file mode 100644 index 00000000..e69de29b diff --git a/tests/compat/git/path_containing_space_b/in/foo b/baz.txt b/tests/compat/git/path_containing_space_b/in/foo b/baz.txt new file mode 100644 index 00000000..3367afdb --- /dev/null +++ b/tests/compat/git/path_containing_space_b/in/foo b/baz.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/path_containing_space_b/in/foo.patch b/tests/compat/git/path_containing_space_b/in/foo.patch new file mode 100644 index 00000000..15c6fca2 --- /dev/null +++ b/tests/compat/git/path_containing_space_b/in/foo.patch @@ -0,0 +1,7 @@ +diff --git a/foo b/baz.txt b/foo b/baz.txt +index 3367afd..3e75765 100644 +--- a/foo b/baz.txt ++++ b/foo b/baz.txt +@@ -1 +1 @@ +-old ++new diff --git a/tests/compat/git/path_containing_space_b/out/foo b/baz.txt b/tests/compat/git/path_containing_space_b/out/foo b/baz.txt new file mode 100644 index 00000000..3e757656 --- /dev/null +++ b/tests/compat/git/path_containing_space_b/out/foo b/baz.txt @@ -0,0 +1 @@ +new diff --git a/tests/compat/git/path_custom_prefix/in/foo.patch b/tests/compat/git/path_custom_prefix/in/foo.patch new file mode 100644 index 00000000..013810f8 --- /dev/null +++ b/tests/compat/git/path_custom_prefix/in/foo.patch @@ -0,0 +1,13 @@ +diff --git src/old.txt dst/old.txt +index 4083766..40c94fa 100644 +--- src/old.txt ++++ dst/old.txt +@@ -2,7 +2,7 @@ line1 + line2 + line3 + line4 +-line5 ++changed + line6 + line7 + line8 diff --git a/tests/compat/git/path_custom_prefix/in/old.txt b/tests/compat/git/path_custom_prefix/in/old.txt new file mode 100644 index 00000000..4083766a --- /dev/null +++ b/tests/compat/git/path_custom_prefix/in/old.txt @@ -0,0 +1,10 @@ +line1 +line2 +line3 +line4 +line5 +line6 +line7 +line8 +line9 +line10 diff --git a/tests/compat/git/path_custom_prefix/out/old.txt b/tests/compat/git/path_custom_prefix/out/old.txt new file mode 100644 index 00000000..40c94fa1 --- /dev/null +++ b/tests/compat/git/path_custom_prefix/out/old.txt @@ -0,0 +1,10 @@ +line1 +line2 +line3 +line4 +changed +line6 +line7 +line8 +line9 +line10 diff --git "a/tests/compat/git/path_mixed_quoted/in/foo\tbar.txt" "b/tests/compat/git/path_mixed_quoted/in/foo\tbar.txt" new file mode 100644 index 00000000..4083766a --- /dev/null +++ "b/tests/compat/git/path_mixed_quoted/in/foo\tbar.txt" @@ -0,0 +1,10 @@ +line1 +line2 +line3 +line4 +line5 +line6 +line7 +line8 +line9 +line10 diff --git a/tests/compat/git/path_mixed_quoted/in/foo.patch b/tests/compat/git/path_mixed_quoted/in/foo.patch new file mode 100644 index 00000000..23016da4 --- /dev/null +++ b/tests/compat/git/path_mixed_quoted/in/foo.patch @@ -0,0 +1,16 @@ +diff --git "a/foo\tbar.txt" b/normal.txt +similarity index 87% +rename from "foo\tbar.txt" +rename to normal.txt +index 4083766..40c94fa 100644 +--- "a/foo\tbar.txt" ++++ b/normal.txt +@@ -2,7 +2,7 @@ line1 + line2 + line3 + line4 +-line5 ++changed + line6 + line7 + line8 diff --git a/tests/compat/git/path_mixed_quoted/out/normal.txt b/tests/compat/git/path_mixed_quoted/out/normal.txt new file mode 100644 index 00000000..40c94fa1 --- /dev/null +++ b/tests/compat/git/path_mixed_quoted/out/normal.txt @@ -0,0 +1,10 @@ +line1 +line2 +line3 +line4 +changed +line6 +line7 +line8 +line9 +line10 diff --git a/tests/compat/git/path_no_prefix/in/file.txt b/tests/compat/git/path_no_prefix/in/file.txt new file mode 100644 index 00000000..3367afdb --- /dev/null +++ b/tests/compat/git/path_no_prefix/in/file.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/path_no_prefix/in/foo.patch b/tests/compat/git/path_no_prefix/in/foo.patch new file mode 100644 index 00000000..5e2a9f8f --- /dev/null +++ b/tests/compat/git/path_no_prefix/in/foo.patch @@ -0,0 +1,7 @@ +diff --git file.txt file.txt +index 3367afd..3e75765 100644 +--- file.txt ++++ file.txt +@@ -1 +1 @@ +-old ++new diff --git a/tests/compat/git/path_no_prefix/out/file.txt b/tests/compat/git/path_no_prefix/out/file.txt new file mode 100644 index 00000000..3e757656 --- /dev/null +++ b/tests/compat/git/path_no_prefix/out/file.txt @@ -0,0 +1 @@ +new diff --git "a/tests/compat/git/path_quoted_escapes/in/foo\tbar.txt" "b/tests/compat/git/path_quoted_escapes/in/foo\tbar.txt" new file mode 100644 index 00000000..3367afdb --- /dev/null +++ "b/tests/compat/git/path_quoted_escapes/in/foo\tbar.txt" @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/path_quoted_escapes/in/foo.patch b/tests/compat/git/path_quoted_escapes/in/foo.patch new file mode 100644 index 00000000..26692238 --- /dev/null +++ b/tests/compat/git/path_quoted_escapes/in/foo.patch @@ -0,0 +1,7 @@ +diff --git "a/foo\tbar.txt" "b/foo\tbar.txt" +index 3367afd..3e75765 100644 +--- "a/foo\tbar.txt" ++++ "b/foo\tbar.txt" +@@ -1 +1 @@ +-old ++new diff --git "a/tests/compat/git/path_quoted_escapes/out/foo\tbar.txt" "b/tests/compat/git/path_quoted_escapes/out/foo\tbar.txt" new file mode 100644 index 00000000..3e757656 --- /dev/null +++ "b/tests/compat/git/path_quoted_escapes/out/foo\tbar.txt" @@ -0,0 +1 @@ +new diff --git a/tests/compat/git/path_quoted_inner_quote/in/foo.patch b/tests/compat/git/path_quoted_inner_quote/in/foo.patch new file mode 100644 index 00000000..473251d9 --- /dev/null +++ b/tests/compat/git/path_quoted_inner_quote/in/foo.patch @@ -0,0 +1,7 @@ +diff --git "a/with\"quote.txt" "b/with\"quote.txt" +index 3367afd..3e75765 100644 +--- "a/with\"quote.txt" ++++ "b/with\"quote.txt" +@@ -1 +1 @@ +-old ++new diff --git "a/tests/compat/git/path_quoted_inner_quote/in/with\"quote.txt" "b/tests/compat/git/path_quoted_inner_quote/in/with\"quote.txt" new file mode 100644 index 00000000..3367afdb --- /dev/null +++ "b/tests/compat/git/path_quoted_inner_quote/in/with\"quote.txt" @@ -0,0 +1 @@ +old diff --git "a/tests/compat/git/path_quoted_inner_quote/out/with\"quote.txt" "b/tests/compat/git/path_quoted_inner_quote/out/with\"quote.txt" new file mode 100644 index 00000000..3e757656 --- /dev/null +++ "b/tests/compat/git/path_quoted_inner_quote/out/with\"quote.txt" @@ -0,0 +1 @@ +new diff --git a/tests/compat/git/path_quoted_named_escape/in/foo.patch b/tests/compat/git/path_quoted_named_escape/in/foo.patch new file mode 100644 index 00000000..cfd3f562 --- /dev/null +++ b/tests/compat/git/path_quoted_named_escape/in/foo.patch @@ -0,0 +1,6 @@ +diff --git "a/bel\a" "b/bel\a" +new file mode 100644 +--- /dev/null ++++ "b/bel\a" +@@ -0,0 +1 @@ ++hello diff --git "a/tests/compat/git/path_quoted_named_escape/out/bel\a" "b/tests/compat/git/path_quoted_named_escape/out/bel\a" new file mode 100644 index 00000000..ce013625 --- /dev/null +++ "b/tests/compat/git/path_quoted_named_escape/out/bel\a" @@ -0,0 +1 @@ +hello diff --git a/tests/compat/git/path_quoted_octal_escape/in/foo.patch b/tests/compat/git/path_quoted_octal_escape/in/foo.patch new file mode 100644 index 00000000..5dda9ec6 --- /dev/null +++ b/tests/compat/git/path_quoted_octal_escape/in/foo.patch @@ -0,0 +1,6 @@ +diff --git "a/tl\033" "b/tl\033" +new file mode 100644 +--- /dev/null ++++ "b/tl\033" +@@ -0,0 +1 @@ ++hello diff --git "a/tests/compat/git/path_quoted_octal_escape/out/tl\033" "b/tests/compat/git/path_quoted_octal_escape/out/tl\033" new file mode 100644 index 00000000..ce013625 --- /dev/null +++ "b/tests/compat/git/path_quoted_octal_escape/out/tl\033" @@ -0,0 +1 @@ +hello diff --git a/tests/compat/git/path_with_spaces/in/foo bar.txt b/tests/compat/git/path_with_spaces/in/foo bar.txt new file mode 100644 index 00000000..3367afdb --- /dev/null +++ b/tests/compat/git/path_with_spaces/in/foo bar.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/path_with_spaces/in/foo.patch b/tests/compat/git/path_with_spaces/in/foo.patch new file mode 100644 index 00000000..b3d1d463 --- /dev/null +++ b/tests/compat/git/path_with_spaces/in/foo.patch @@ -0,0 +1,7 @@ +diff --git a/foo bar.txt b/foo bar.txt +index 3367afd..3e75765 100644 +--- a/foo bar.txt ++++ b/foo bar.txt +@@ -1 +1 @@ +-old ++new diff --git a/tests/compat/git/path_with_spaces/out/foo bar.txt b/tests/compat/git/path_with_spaces/out/foo bar.txt new file mode 100644 index 00000000..3e757656 --- /dev/null +++ b/tests/compat/git/path_with_spaces/out/foo bar.txt @@ -0,0 +1 @@ +new diff --git a/tests/compat/gnu_patch/format_patch_mbox/in/a.txt b/tests/compat/gnu_patch/format_patch_mbox/in/a.txt new file mode 100644 index 00000000..ffc74965 --- /dev/null +++ b/tests/compat/gnu_patch/format_patch_mbox/in/a.txt @@ -0,0 +1 @@ +old a diff --git a/tests/compat/gnu_patch/format_patch_mbox/in/b.txt b/tests/compat/gnu_patch/format_patch_mbox/in/b.txt new file mode 100644 index 00000000..568e6192 --- /dev/null +++ b/tests/compat/gnu_patch/format_patch_mbox/in/b.txt @@ -0,0 +1 @@ +old b diff --git a/tests/compat/gnu_patch/format_patch_mbox/in/c.txt b/tests/compat/gnu_patch/format_patch_mbox/in/c.txt new file mode 100644 index 00000000..89cb37d7 --- /dev/null +++ b/tests/compat/gnu_patch/format_patch_mbox/in/c.txt @@ -0,0 +1 @@ +old c diff --git a/tests/compat/gnu_patch/format_patch_mbox/in/foo.patch b/tests/compat/gnu_patch/format_patch_mbox/in/foo.patch new file mode 100644 index 00000000..720648ea --- /dev/null +++ b/tests/compat/gnu_patch/format_patch_mbox/in/foo.patch @@ -0,0 +1,56 @@ +From aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa Mon Sep 17 00:00:00 2001 +From: Alice +Date: Mon, 1 Jan 2024 00:00:00 +0000 +Subject: [PATCH 1/3] first patch + +First commit message. +--- + a.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/a.txt b/a.txt +--- a/a.txt ++++ b/a.txt +@@ -1 +1 @@ +-old a ++new a +-- +2.40.0 + +From bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb Mon Sep 17 00:00:00 2001 +From: Bob +Date: Mon, 1 Jan 2024 00:00:01 +0000 +Subject: [PATCH 2/3] second patch + +Second commit message. +--- + b.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/b.txt b/b.txt +--- a/b.txt ++++ b/b.txt +@@ -1 +1 @@ +-old b ++new b +-- +2.40.0 + +From cccccccccccccccccccccccccccccccccccccccc Mon Sep 17 00:00:00 2001 +From: Carol +Date: Mon, 1 Jan 2024 00:00:02 +0000 +Subject: [PATCH 3/3] third patch + +Third commit message. +--- + c.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/c.txt b/c.txt +--- a/c.txt ++++ b/c.txt +@@ -1 +1 @@ +-old c ++new c +-- +2.40.0 diff --git a/tests/compat/gnu_patch/format_patch_mbox/out/a.txt b/tests/compat/gnu_patch/format_patch_mbox/out/a.txt new file mode 100644 index 00000000..56db76ce --- /dev/null +++ b/tests/compat/gnu_patch/format_patch_mbox/out/a.txt @@ -0,0 +1 @@ +new a diff --git a/tests/compat/gnu_patch/format_patch_mbox/out/b.txt b/tests/compat/gnu_patch/format_patch_mbox/out/b.txt new file mode 100644 index 00000000..81e3be46 --- /dev/null +++ b/tests/compat/gnu_patch/format_patch_mbox/out/b.txt @@ -0,0 +1 @@ +new b diff --git a/tests/compat/gnu_patch/format_patch_mbox/out/c.txt b/tests/compat/gnu_patch/format_patch_mbox/out/c.txt new file mode 100644 index 00000000..8ed67817 --- /dev/null +++ b/tests/compat/gnu_patch/format_patch_mbox/out/c.txt @@ -0,0 +1 @@ +new c diff --git a/tests/compat/gnu_patch/mod.rs b/tests/compat/gnu_patch/mod.rs index 643d16e9..3931a414 100644 --- a/tests/compat/gnu_patch/mod.rs +++ b/tests/compat/gnu_patch/mod.rs @@ -158,6 +158,13 @@ fn non_utf8_hunk_content() { Case::gnu_patch("non_utf8_hunk_content").run(); } +// Mbox stream: three concatenated `git format-patch` emails in one file. +// GNU patch finds ---/+++ boundaries regardless of email headers. +#[test] +fn format_patch_mbox() { + Case::gnu_patch("format_patch_mbox").strip(1).run(); +} + // Failure cases #[test] diff --git a/tests/compat/main.rs b/tests/compat/main.rs index 8faf9eac..e35ed079 100644 --- a/tests/compat/main.rs +++ b/tests/compat/main.rs @@ -39,9 +39,11 @@ //! //! 1. Create `case_name/in/` with input file(s) and `foo.patch` //! 2. Run `SNAPSHOTS=overwrite cargo test --test compat` to generate `out/` -//! 3. Add `#[test] fn case_name() { Case::gnu_patch(...).run(); }` in the module +//! 3. Add `#[test] fn case_name() { Case::{gnu_patch,git}(...).run(); }` in the module //! //! For failure tests, use `.expect_success(false)` and skip step 2. +//! For intentional compat divergence, use `.expect_compat(false)`. mod common; +mod git; mod gnu_patch; diff --git a/tests/replay.rs b/tests/replay.rs index 38cddd7b..c98eef12 100644 --- a/tests/replay.rs +++ b/tests/replay.rs @@ -18,8 +18,7 @@ //! * A range (e.g., `abc123..def456`) for a specific commit range //! //! Defaults to 200. Use `0` to verify entire history. -//! * `DIFFY_TEST_PARSE_MODE`: Parse mode to use. -//! Currently only `unidiff` is supported. +//! * `DIFFY_TEST_PARSE_MODE`: Parse mode to use (`unidiff` or `gitdiff`). //! Defaults to `unidiff`. //! //! ## Requirements @@ -163,12 +162,14 @@ impl CatFile { #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum TestMode { UniDiff, + GitDiff, } impl From for ParseOptions { fn from(value: TestMode) -> Self { match value { TestMode::UniDiff => ParseOptions::unidiff(), + TestMode::GitDiff => ParseOptions::gitdiff(), } } } @@ -230,7 +231,8 @@ fn test_mode() -> TestMode { }; match val.trim().to_lowercase().as_str() { "unidiff" => TestMode::UniDiff, - _ => panic!("invalid DIFFY_TEST_PARSE_MODE='{val}': expected 'unidiff'"), + "gitdiff" => TestMode::GitDiff, + _ => panic!("invalid DIFFY_TEST_PARSE_MODE='{val}': expected 'unidiff' or 'gitdiff'"), } } @@ -329,8 +331,13 @@ fn process_commit( // UniDiff format cannot express pure renames (no ---/+++ headers). // Use `--no-renames` to represent them as delete + create instead. + // GitDiff mode handles renames via extended headers natively. let diff_output = match mode { TestMode::UniDiff => git_bytes(repo, &["diff", "--no-renames", parent, child]), + // TODO: pass `--binary` once binary patch support lands, + // so binary files get actual delta/literal data instead of + // "Binary files differ" markers. + TestMode::GitDiff => git_bytes(repo, &["diff", parent, child]), }; if diff_output.is_empty() { @@ -379,6 +386,20 @@ fn process_commit( } text_files + type_changes } + TestMode::GitDiff => { + // Can't use `--numstat` for GitDiff: it shows `-\t-\t` for both + // actual binary diffs AND pure binary renames (100% similarity). + // Use `--raw` for total count instead. + let raw = git(repo, &["diff", "--raw", parent, child]); + let (mut total, mut type_changes) = (0, 0); + for line in raw.lines().filter(|l| !l.is_empty()) { + total += 1; + if is_type_change(line) { + type_changes += 1; + } + } + total + type_changes + } }; if expected_file_count == 0 { @@ -523,6 +544,10 @@ fn process_commit( ); } } + PatchKind::Binary => { + skipped += 1; + continue; + } } applied += 1; @@ -560,6 +585,7 @@ fn replay() { .unwrap_or_else(|| ".".to_string()); let mode_name = match mode { TestMode::UniDiff => "unidiff", + TestMode::GitDiff => "gitdiff", }; // Shared state for progress reporting