From cdde0e7f05d4410ba01471c86c89a95444abfac8 Mon Sep 17 00:00:00 2001 From: Victor Hallberg Date: Thu, 18 Jun 2026 12:43:04 +0200 Subject: [PATCH] fix: make directive scanner handle quotes in JSX correctly --- Cargo.lock | 1 + crates/lingui_macro/Cargo.toml | 1 + .../lingui_macro/src/comment_directive/mod.rs | 128 +++---- .../src/comment_directive/source_scanner.rs | 320 ------------------ crates/lingui_macro/tests/lingui_directive.rs | 23 ++ ...ctive__jsx_trans_with_unclosed_quotes.snap | 36 ++ 6 files changed, 128 insertions(+), 381 deletions(-) delete mode 100644 crates/lingui_macro/src/comment_directive/source_scanner.rs create mode 100644 crates/lingui_macro/tests/snapshots/lingui_directive__jsx_trans_with_unclosed_quotes.snap diff --git a/Cargo.lock b/Cargo.lock index dba269c..657dbc9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -792,6 +792,7 @@ version = "6.4.0" dependencies = [ "data-encoding", "insta", + "regex", "serde", "serde_json", "sha2 0.11.0", diff --git a/crates/lingui_macro/Cargo.toml b/crates/lingui_macro/Cargo.toml index 9b89883..658ac76 100644 --- a/crates/lingui_macro/Cargo.toml +++ b/crates/lingui_macro/Cargo.toml @@ -8,6 +8,7 @@ crate-type = ["cdylib", "rlib"] [dependencies] data-encoding = "2.11.0" +regex = "1.12.3" sha2 = "0.11.0" serde = "1.0.228" serde_json = "1.0.150" diff --git a/crates/lingui_macro/src/comment_directive/mod.rs b/crates/lingui_macro/src/comment_directive/mod.rs index 68f579c..fef37bf 100644 --- a/crates/lingui_macro/src/comment_directive/mod.rs +++ b/crates/lingui_macro/src/comment_directive/mod.rs @@ -1,12 +1,22 @@ -mod source_scanner; +use std::sync::LazyLock; -use source_scanner::{scan_source_comments, CommentKind}; +use regex::Regex; use swc_core::common::{BytePos, Span}; use swc_core::plugin::errors::HANDLER; -fn is_lingui_directive_prefix(comment: &str) -> bool { - comment.starts_with("lingui-set") || comment.starts_with("lingui-reset") -} +/// Matches a `lingui-set` / `lingui-reset` directive introduced by a line +/// comment (`//`), block comment (`/*`) or JSDoc comment (`/**`). +/// Group 1 is the directive kind, group 2 the rest of the line (params, along with +/// trailing `*/` for block comments that [`parse_lingui_directive`] strips). +/// +/// This is deliberately a plain text scan: it does not understand strings, +/// template literals or JSX, so a directive-looking comment *inside* a string +/// literal is a false positive. This is an intentional trade-off to avoid +/// requiring a full TS+JSX aware lexer pass. +static DIRECTIVE_RE: LazyLock = LazyLock::new(|| { + Regex::new(r"/(?:/|\*\*?)\s*lingui-(set|reset)[ ]*([^\n]*)") + .expect("lingui directive regex is valid") +}); #[derive(Debug, Clone, PartialEq, Eq, Default)] pub struct DirectiveValues { @@ -94,23 +104,14 @@ fn parse_value_update(value: &str) -> DirectiveValueUpdate { } } -fn parse_lingui_directive(comment_value: &str) -> Result, String> { - let trimmed = comment_value.trim(); - - let (directive_name, rest) = if let Some(rest) = trimmed.strip_prefix("lingui-set") { - ("lingui-set", rest) - } else if let Some(rest) = trimmed.strip_prefix("lingui-reset") { - ("lingui-reset", rest) - } else { - return Ok(None); - }; - - if !rest.is_empty() && !rest.starts_with(char::is_whitespace) { - return Ok(None); - } +fn parse_lingui_directive(reset: bool, params: &str) -> Result { + let directive_name = if reset { "lingui-reset" } else { "lingui-set" }; - let reset = directive_name == "lingui-reset"; - let rest = rest.trim(); + // The regex captures everything up to the end of the line, which for a + // block comment includes the trailing `*/`. Strip it so the params parse + // cleanly (and `lingui-reset` with no params is recognised as a bare reset). + let rest = params.trim(); + let rest = rest.strip_suffix("*/").unwrap_or(rest).trim(); let mut values = DirectiveUpdate::default(); let mut has_params = false; @@ -135,7 +136,7 @@ fn parse_lingui_directive(comment_value: &str) -> Result } if pos == key_start { return Err(format!( - "`{directive_name}` directive has invalid syntax: {trimmed}" + "`{directive_name}` directive has invalid syntax: {directive_name} {rest}" )); } let key = &rest[key_start..pos]; @@ -161,7 +162,7 @@ fn parse_lingui_directive(comment_value: &str) -> Result } if pos >= rest_bytes.len() { return Err(format!( - "`{directive_name}` directive has invalid syntax: {trimmed}" + "`{directive_name}` directive has invalid syntax: {directive_name} {rest}" )); } let value = &rest[value_start..pos]; @@ -188,7 +189,7 @@ fn parse_lingui_directive(comment_value: &str) -> Result )); } - Ok(Some(ParsedDirective { reset, values })) + Ok(ParsedDirective { reset, values }) } fn find_directive_for_pos(directives: &[DirectiveEntry], pos: BytePos) -> Option<&DirectiveValues> { @@ -223,22 +224,16 @@ fn collect_lingui_directives_from_source(source: &str, start_pos: BytePos) -> Ve let mut directives = Vec::new(); let mut accumulated = DirectiveValues::default(); - for comment in scan_source_comments(source) { - let comment_start = BytePos(start_pos.0 + comment.byte_offset as u32); - let trimmed = comment.content.trim(); + for caps in DIRECTIVE_RE.captures_iter(source) { + let matched = caps.get(0).expect("group 0 always matches"); + let comment_start = BytePos(start_pos.0 + matched.start() as u32); + let span = Span::new(comment_start, BytePos(start_pos.0 + matched.end() as u32)); - if !is_lingui_directive_prefix(trimmed) { - continue; - } - - let content_end = match comment.kind { - CommentKind::Line => BytePos(comment_start.0 + 2 + comment.content.len() as u32), - CommentKind::Block => BytePos(comment_start.0 + 2 + comment.content.len() as u32 + 2), - }; - let span = Span::new(comment_start, content_end); + let reset = &caps[1] == "reset"; + let params = &caps[2]; - match parse_lingui_directive(trimmed) { - Ok(Some(parsed)) => { + match parse_lingui_directive(reset, params) { + Ok(parsed) => { let mut values = if parsed.reset { DirectiveValues::default() } else { @@ -253,7 +248,6 @@ fn collect_lingui_directives_from_source(source: &str, start_pos: BytePos) -> Ve values, }) } - Ok(None) => {} Err(message) => { HANDLER.with(|handler| handler.struct_span_err(span, &message).emit()); } @@ -270,31 +264,53 @@ mod tests { #[test] fn parse_should_parse_multiple_keys() { let parsed = - parse_lingui_directive(r#" lingui-set context="ctx" comment="cmt" idPrefix="p." "#) - .unwrap(); + parse_lingui_directive(false, r#"context="ctx" comment="cmt" idPrefix="p." "#).unwrap(); assert_eq!( parsed, - Some(ParsedDirective { + ParsedDirective { reset: false, values: DirectiveUpdate { context: Some(DirectiveValueUpdate::Set("ctx".into())), comment: Some(DirectiveValueUpdate::Set("cmt".into())), id_prefix: Some(DirectiveValueUpdate::Set("p.".into())), } - }) + } + ); + } + + #[test] + fn parse_should_strip_trailing_block_comment_terminator() { + let parsed = parse_lingui_directive(false, r#"context="ctx" */"#).unwrap(); + + assert_eq!( + parsed, + ParsedDirective { + reset: false, + values: DirectiveUpdate { + context: Some(DirectiveValueUpdate::Set("ctx".into())), + ..Default::default() + } + } ); } #[test] - fn parse_should_return_none_for_non_directive_comments() { - assert_eq!(parse_lingui_directive(" some comment ").unwrap(), None); - assert_eq!(parse_lingui_directive(" i18n ").unwrap(), None); + fn parse_should_accept_bare_reset() { + let parsed = parse_lingui_directive(true, "*/").unwrap(); + + assert_eq!( + parsed, + ParsedDirective { + reset: true, + values: DirectiveUpdate::default(), + } + ); } #[test] fn parse_should_reject_invalid_syntax() { - let error = parse_lingui_directive(" lingui-set context=single ") + let error = parse_lingui_directive(false, "context=single") .expect_err("expected parser to reject invalid syntax"); assert!(error.contains("requires a value")); @@ -302,7 +318,7 @@ mod tests { #[test] fn parse_should_reject_unknown_params() { - let error = parse_lingui_directive(r#" lingui-set unknown="value" "#) + let error = parse_lingui_directive(false, r#"unknown="value""#) .expect_err("expected parser to reject unknown params"); assert!(error.contains("unknown param \"unknown\"")); @@ -310,18 +326,18 @@ mod tests { #[test] fn parse_should_treat_empty_strings_as_unset() { - let parsed = parse_lingui_directive(r#" lingui-set context="" comment="note" "#).unwrap(); + let parsed = parse_lingui_directive(false, r#"context="" comment="note""#).unwrap(); assert_eq!( parsed, - Some(ParsedDirective { + ParsedDirective { reset: false, values: DirectiveUpdate { context: Some(DirectiveValueUpdate::Unset), comment: Some(DirectiveValueUpdate::Set("note".into())), id_prefix: None, } - }) + } ); } @@ -345,16 +361,6 @@ mod tests { ); } - #[test] - fn collect_from_source_should_ignore_template_text_that_looks_like_comment() { - let directives = collect_lingui_directives_from_source( - "const msg = `\n// lingui-set context=\"ctx\"\n`;\n", - BytePos(10), - ); - - assert_eq!(directives, vec![]); - } - #[test] fn collect_should_merge_and_reset_directives() { let directives = collect_lingui_directives_from_source( diff --git a/crates/lingui_macro/src/comment_directive/source_scanner.rs b/crates/lingui_macro/src/comment_directive/source_scanner.rs deleted file mode 100644 index 94bd03f..0000000 --- a/crates/lingui_macro/src/comment_directive/source_scanner.rs +++ /dev/null @@ -1,320 +0,0 @@ -pub struct SourceComment<'a> { - pub byte_offset: usize, - pub content: &'a str, - pub kind: CommentKind, -} - -pub enum CommentKind { - Line, - Block, -} - -pub fn scan_source_comments(source: &str) -> Vec> { - let bytes = source.as_bytes(); - let mut comments = Vec::new(); - let mut index = 0usize; - - while index < bytes.len() { - match bytes[index] { - b'\'' | b'"' => { - index = skip_string_literal(bytes, index); - } - b'`' => { - index = skip_template_literal(bytes, index); - } - b'/' if bytes.get(index + 1) == Some(&b'/') => { - let comment_start = index; - let content_start = index + 2; - index = content_start; - - while index < bytes.len() && bytes[index] != b'\n' { - index += 1; - } - - comments.push(SourceComment { - byte_offset: comment_start, - content: &source[content_start..index], - kind: CommentKind::Line, - }); - } - b'/' if bytes.get(index + 1) == Some(&b'*') => { - let comment_start = index; - let content_start = index + 2; - index = content_start; - - while index < bytes.len() { - if bytes[index] == b'*' && bytes.get(index + 1) == Some(&b'/') { - break; - } - index += 1; - } - - let content_end = index; - if index < bytes.len() { - index += 2; - } - - comments.push(SourceComment { - byte_offset: comment_start, - content: &source[content_start..content_end], - kind: CommentKind::Block, - }); - } - _ => { - index += 1; - } - } - } - - comments -} - -fn skip_string_literal(bytes: &[u8], start: usize) -> usize { - let delim = bytes[start]; - let mut i = start + 1; - while i < bytes.len() { - if bytes[i] == b'\\' { - i = (i + 2).min(bytes.len()); - } else if bytes[i] == delim { - return i + 1; - } else { - i += 1; - } - } - i -} - -fn skip_template_literal(bytes: &[u8], start: usize) -> usize { - let mut i = start + 1; - while i < bytes.len() { - if bytes[i] == b'\\' { - i = (i + 2).min(bytes.len()); - } else if bytes[i] == b'`' { - return i + 1; - } else { - i += 1; - } - } - i -} - -#[cfg(test)] -mod tests { - use super::*; - - fn line_comments(source: &str) -> Vec<(usize, &str)> { - scan_source_comments(source) - .into_iter() - .filter(|c| matches!(c.kind, CommentKind::Line)) - .map(|c| (c.byte_offset, c.content)) - .collect() - } - - fn block_comments(source: &str) -> Vec<(usize, &str)> { - scan_source_comments(source) - .into_iter() - .filter(|c| matches!(c.kind, CommentKind::Block)) - .map(|c| (c.byte_offset, c.content)) - .collect() - } - - fn all_comments(source: &str) -> Vec<(usize, &str)> { - scan_source_comments(source) - .into_iter() - .map(|c| (c.byte_offset, c.content)) - .collect() - } - - #[test] - fn empty_source() { - assert_eq!(all_comments(""), Vec::<(usize, &str)>::new()); - } - - #[test] - fn no_comments() { - assert_eq!(all_comments("const x = 1;\nlet y = 2;"), vec![]); - } - - #[test] - fn single_line_comment() { - assert_eq!(line_comments("// hello world"), vec![(0, " hello world")]); - } - - #[test] - fn line_comment_after_code() { - assert_eq!( - line_comments("const x = 1; // inline"), - vec![(13, " inline")] - ); - } - - #[test] - fn multiple_line_comments() { - let source = "// first\n// second\ncode\n// third"; - assert_eq!( - line_comments(source), - vec![(0, " first"), (9, " second"), (24, " third")] - ); - } - - #[test] - fn single_block_comment() { - assert_eq!(block_comments("/* block */"), vec![(0, " block ")]); - } - - #[test] - fn multiline_block_comment() { - let source = "/* line1\n line2 */"; - assert_eq!(block_comments(source), vec![(0, " line1\n line2 ")]); - } - - #[test] - fn block_comment_after_code() { - assert_eq!( - block_comments("x = 1; /* note */ y = 2;"), - vec![(7, " note ")] - ); - } - - #[test] - fn ignores_comment_syntax_in_single_quoted_string() { - assert_eq!(all_comments("const x = '// not a comment';"), vec![]); - assert_eq!(all_comments("const x = '/* not a comment */';"), vec![]); - } - - #[test] - fn ignores_comment_syntax_in_double_quoted_string() { - assert_eq!(all_comments(r#"const x = "// not a comment";"#), vec![]); - assert_eq!(all_comments(r#"const x = "/* not a comment */";"#), vec![]); - } - - #[test] - fn ignores_comment_syntax_in_template_literal() { - assert_eq!(all_comments("const x = `// not a comment`;"), vec![]); - assert_eq!(all_comments("const x = `/* not a comment */`;"), vec![]); - } - - #[test] - fn handles_escaped_quotes_in_single_quoted_string() { - assert_eq!( - all_comments(r"const x = 'it\'s'; // after"), - vec![(19, " after")] - ); - } - - #[test] - fn handles_escaped_quotes_in_double_quoted_string() { - assert_eq!( - all_comments(r#"const x = "say \"hi\""; // after"#), - vec![(24, " after")] - ); - } - - #[test] - fn handles_escaped_backtick_in_template_literal() { - assert_eq!( - all_comments(r"const x = `\`template\``; // after"), - vec![(26, " after")] - ); - } - - #[test] - fn handles_backslash_at_end_of_string() { - // String ending with escape at EOF (unterminated) - assert_eq!(all_comments(r"const x = '\"), vec![]); - } - - #[test] - fn handles_backslash_at_end_of_template() { - assert_eq!(all_comments("const x = `\\"), vec![]); - } - - #[test] - fn unterminated_single_quoted_string() { - // No closing quote — scanner shouldn't panic - assert_eq!(all_comments("const x = 'unterminated // nope"), vec![]); - } - - #[test] - fn unterminated_double_quoted_string() { - assert_eq!(all_comments(r#"const x = "unterminated // nope"#), vec![]); - } - - #[test] - fn unterminated_template_literal() { - assert_eq!(all_comments("const x = `unterminated // nope"), vec![]); - } - - #[test] - fn unterminated_block_comment() { - // Block comment that never closes — content runs to end - assert_eq!( - block_comments("/* never closed"), - vec![(0, " never closed")] - ); - } - - #[test] - fn mixed_comment_types() { - let source = "// line\n/* block */\ncode // inline"; - let comments = all_comments(source); - assert_eq!( - comments, - vec![(0, " line"), (8, " block "), (25, " inline")] - ); - } - - #[test] - fn slash_not_followed_by_slash_or_star() { - // Division operator should not be mistaken for comment - assert_eq!(all_comments("const x = 10 / 2;"), vec![]); - } - - #[test] - fn empty_line_comment() { - assert_eq!(line_comments("//\ncode"), vec![(0, "")]); - } - - #[test] - fn empty_block_comment() { - assert_eq!(block_comments("/**/"), vec![(0, "")]); - } - - #[test] - fn block_comment_with_star_inside() { - assert_eq!(block_comments("/* a * b */"), vec![(0, " a * b ")]); - } - - #[test] - fn consecutive_block_comments() { - // "/* a */" = 7 bytes, so second comment starts at offset 7 - assert_eq!( - block_comments("/* a *//* b */"), - vec![(0, " a "), (7, " b ")] - ); - } - - #[test] - fn line_comment_at_eof_without_newline() { - assert_eq!(line_comments("// eof"), vec![(0, " eof")]); - } - - #[test] - fn comment_after_template_literal_with_expressions() { - // Template with ${} — the simplified scanner treats it as text until closing backtick - let source = "const x = `hello ${world}`; // after"; - assert_eq!(line_comments(source), vec![(28, " after")]); - } - - #[test] - fn multiline_template_literal_with_comment_like_content() { - let source = "const x = `\n// fake\n/* also fake */\n`;\n// real"; - assert_eq!(line_comments(source), vec![(39, " real")]); - } - - #[test] - fn string_containing_backslash_n_is_not_newline() { - // The literal text \n in a string (escaped), not a real newline - assert_eq!(all_comments("const x = '\\n'; // yes"), vec![(16, " yes")]); - } -} diff --git a/crates/lingui_macro/tests/lingui_directive.rs b/crates/lingui_macro/tests/lingui_directive.rs index 6865119..5aac9ea 100644 --- a/crates/lingui_macro/tests/lingui_directive.rs +++ b/crates/lingui_macro/tests/lingui_directive.rs @@ -253,6 +253,29 @@ to!( "# ); +to!( + jsx_trans_with_unclosed_quotes, + LinguiOptions { + id_prefix_leader: Some(".".into()), + ..Default::default() + }, + r#" + // lingui-set idPrefix="root" + import type { MessageDescriptor } from '@lingui/core' + import { msg, t } from '@lingui/core/macro' + import { Trans } from '@lingui/react/macro' + + const X = () =>

'

+ const Y = () =>

`

+ + // lingui-set idPrefix="different" + const different = { + a: msg({ id: '.a', message: `different a` }), + b: msg({ id: '.b', message: `different b` }), + } as const satisfies Record + "# +); + to!( jsx_trans_with_directive_context, r#" diff --git a/crates/lingui_macro/tests/snapshots/lingui_directive__jsx_trans_with_unclosed_quotes.snap b/crates/lingui_macro/tests/snapshots/lingui_directive__jsx_trans_with_unclosed_quotes.snap new file mode 100644 index 0000000..47d14ec --- /dev/null +++ b/crates/lingui_macro/tests/snapshots/lingui_directive__jsx_trans_with_unclosed_quotes.snap @@ -0,0 +1,36 @@ +--- +source: crates/lingui_macro/tests/lingui_directive.rs +info: + id_prefix_leader: "." +--- +// lingui-set idPrefix="root" +import type { MessageDescriptor } from '@lingui/core' +import { msg, t } from '@lingui/core/macro' +import { Trans } from '@lingui/react/macro' + +const X = () =>

'

+const Y = () =>

`

+ +// lingui-set idPrefix="different" +const different = { + a: msg({ id: '.a', message: `different a` }), + b: msg({ id: '.b', message: `different b` }), +} as const satisfies Record + +↓ ↓ ↓ ↓ ↓ ↓ + +// lingui-set idPrefix="root" +import type { MessageDescriptor } from '@lingui/core'; +const X = ()=>

'

; +const Y = ()=>

`

; +// lingui-set idPrefix="different" +const different = { + a: /*i18n*/ { + id: "different.a", + message: "different a" + }, + b: /*i18n*/ { + id: "different.b", + message: "different b" + } +} as const satisfies Record;