From cdde0e7f05d4410ba01471c86c89a95444abfac8 Mon Sep 17 00:00:00 2001
From: Victor Hallberg <victor@hallberg.cc>
Date: Thu, 18 Jun 2026 12:43:04 +0200
Subject: [PATCH] fix: make directive scanner handle quotes in JSX correctly

---
 Cargo.lock                                    |   1 +
 crates/lingui_macro/Cargo.toml                |   1 +
 .../lingui_macro/src/comment_directive/mod.rs | 128 +++----
 .../src/comment_directive/source_scanner.rs   | 320 ------------------
 crates/lingui_macro/tests/lingui_directive.rs |  23 ++
 ...ctive__jsx_trans_with_unclosed_quotes.snap |  36 ++
 6 files changed, 128 insertions(+), 381 deletions(-)
 delete mode 100644 crates/lingui_macro/src/comment_directive/source_scanner.rs
 create mode 100644 crates/lingui_macro/tests/snapshots/lingui_directive__jsx_trans_with_unclosed_quotes.snap
diff --git a/Cargo.lock b/Cargo.lock
index dba269c..657dbc9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -792,6 +792,7 @@ version = "6.4.0"
 dependencies = [
  "data-encoding",
  "insta",
+ "regex",
  "serde",
  "serde_json",
  "sha2 0.11.0",
diff --git a/crates/lingui_macro/Cargo.toml b/crates/lingui_macro/Cargo.toml
index 9b89883..658ac76 100644
--- a/crates/lingui_macro/Cargo.toml
+++ b/crates/lingui_macro/Cargo.toml
@@ -8,6 +8,7 @@ crate-type = ["cdylib", "rlib"]
 
 [dependencies]
 data-encoding = "2.11.0"
+regex = "1.12.3"
 sha2 = "0.11.0"
 serde = "1.0.228"
 serde_json = "1.0.150"
diff --git a/crates/lingui_macro/src/comment_directive/mod.rs b/crates/lingui_macro/src/comment_directive/mod.rs
index 68f579c..fef37bf 100644
--- a/crates/lingui_macro/src/comment_directive/mod.rs
+++ b/crates/lingui_macro/src/comment_directive/mod.rs
@@ -1,12 +1,22 @@
-mod source_scanner;
+use std::sync::LazyLock;
 
-use source_scanner::{scan_source_comments, CommentKind};
+use regex::Regex;
 use swc_core::common::{BytePos, Span};
 use swc_core::plugin::errors::HANDLER;
 
-fn is_lingui_directive_prefix(comment: &str) -> bool {
-    comment.starts_with("lingui-set") || comment.starts_with("lingui-reset")
-}
+/// Matches a `lingui-set` / `lingui-reset` directive introduced by a line
+/// comment (`//`), block comment (`/*`) or JSDoc comment (`/**`).
+/// Group 1 is the directive kind, group 2 the rest of the line (params, along with
+/// trailing `*/` for block comments that [`parse_lingui_directive`] strips).
+///
+/// This is deliberately a plain text scan: it does not understand strings,
+/// template literals or JSX, so a directive-looking comment *inside* a string
+/// literal is a false positive. This is an intentional trade-off to avoid
+/// requiring a full TS+JSX aware lexer pass.
+static DIRECTIVE_RE: LazyLock<Regex> = LazyLock::new(|| {
+    Regex::new(r"/(?:/|\*\*?)\s*lingui-(set|reset)[ ]*([^\n]*)")
+        .expect("lingui directive regex is valid")
+});
 
 #[derive(Debug, Clone, PartialEq, Eq, Default)]
 pub struct DirectiveValues {
@@ -94,23 +104,14 @@ fn parse_value_update(value: &str) -> DirectiveValueUpdate {
     }
 }
 
-fn parse_lingui_directive(comment_value: &str) -> Result<Option<ParsedDirective>, String> {
-    let trimmed = comment_value.trim();
-
-    let (directive_name, rest) = if let Some(rest) = trimmed.strip_prefix("lingui-set") {
-        ("lingui-set", rest)
-    } else if let Some(rest) = trimmed.strip_prefix("lingui-reset") {
-        ("lingui-reset", rest)
-    } else {
-        return Ok(None);
-    };
-
-    if !rest.is_empty() && !rest.starts_with(char::is_whitespace) {
-        return Ok(None);
-    }
+fn parse_lingui_directive(reset: bool, params: &str) -> Result<ParsedDirective, String> {
+    let directive_name = if reset { "lingui-reset" } else { "lingui-set" };
 
-    let reset = directive_name == "lingui-reset";
-    let rest = rest.trim();
+    // The regex captures everything up to the end of the line, which for a
+    // block comment includes the trailing `*/`. Strip it so the params parse
+    // cleanly (and `lingui-reset` with no params is recognised as a bare reset).
+    let rest = params.trim();
+    let rest = rest.strip_suffix("*/").unwrap_or(rest).trim();
 
     let mut values = DirectiveUpdate::default();
     let mut has_params = false;
@@ -135,7 +136,7 @@ fn parse_lingui_directive(comment_value: &str) -> Result<Option<ParsedDirective>
         }
         if pos == key_start {
             return Err(format!(
-                "`{directive_name}` directive has invalid syntax: {trimmed}"
+                "`{directive_name}` directive has invalid syntax: {directive_name} {rest}"
             ));
         }
         let key = &rest[key_start..pos];
@@ -161,7 +162,7 @@ fn parse_lingui_directive(comment_value: &str) -> Result<Option<ParsedDirective>
         }
         if pos >= rest_bytes.len() {
             return Err(format!(
-                "`{directive_name}` directive has invalid syntax: {trimmed}"
+                "`{directive_name}` directive has invalid syntax: {directive_name} {rest}"
             ));
         }
         let value = &rest[value_start..pos];
@@ -188,7 +189,7 @@ fn parse_lingui_directive(comment_value: &str) -> Result<Option<ParsedDirective>
         ));
     }
 
-    Ok(Some(ParsedDirective { reset, values }))
+    Ok(ParsedDirective { reset, values })
 }
 
 fn find_directive_for_pos(directives: &[DirectiveEntry], pos: BytePos) -> Option<&DirectiveValues> {
@@ -223,22 +224,16 @@ fn collect_lingui_directives_from_source(source: &str, start_pos: BytePos) -> Ve
     let mut directives = Vec::new();
     let mut accumulated = DirectiveValues::default();
 
-    for comment in scan_source_comments(source) {
-        let comment_start = BytePos(start_pos.0 + comment.byte_offset as u32);
-        let trimmed = comment.content.trim();
+    for caps in DIRECTIVE_RE.captures_iter(source) {
+        let matched = caps.get(0).expect("group 0 always matches");
+        let comment_start = BytePos(start_pos.0 + matched.start() as u32);
+        let span = Span::new(comment_start, BytePos(start_pos.0 + matched.end() as u32));
 
-        if !is_lingui_directive_prefix(trimmed) {
-            continue;
-        }
-
-        let content_end = match comment.kind {
-            CommentKind::Line => BytePos(comment_start.0 + 2 + comment.content.len() as u32),
-            CommentKind::Block => BytePos(comment_start.0 + 2 + comment.content.len() as u32 + 2),
-        };
-        let span = Span::new(comment_start, content_end);
+        let reset = &caps[1] == "reset";
+        let params = &caps[2];
 
-        match parse_lingui_directive(trimmed) {
-            Ok(Some(parsed)) => {
+        match parse_lingui_directive(reset, params) {
+            Ok(parsed) => {
                 let mut values = if parsed.reset {
                     DirectiveValues::default()
                 } else {
@@ -253,7 +248,6 @@ fn collect_lingui_directives_from_source(source: &str, start_pos: BytePos) -> Ve
                     values,
                 })
             }
-            Ok(None) => {}
             Err(message) => {
                 HANDLER.with(|handler| handler.struct_span_err(span, &message).emit());
             }
@@ -270,31 +264,53 @@ mod tests {
     #[test]
     fn parse_should_parse_multiple_keys() {
         let parsed =
-            parse_lingui_directive(r#" lingui-set context="ctx" comment="cmt" idPrefix="p." "#)
-                .unwrap();
+            parse_lingui_directive(false, r#"context="ctx" comment="cmt" idPrefix="p." "#).unwrap();
 
         assert_eq!(
             parsed,
-            Some(ParsedDirective {
+            ParsedDirective {
                 reset: false,
                 values: DirectiveUpdate {
                     context: Some(DirectiveValueUpdate::Set("ctx".into())),
                     comment: Some(DirectiveValueUpdate::Set("cmt".into())),
                     id_prefix: Some(DirectiveValueUpdate::Set("p.".into())),
                 }
-            })
+            }
+        );
+    }
+
+    #[test]
+    fn parse_should_strip_trailing_block_comment_terminator() {
+        let parsed = parse_lingui_directive(false, r#"context="ctx" */"#).unwrap();
+
+        assert_eq!(
+            parsed,
+            ParsedDirective {
+                reset: false,
+                values: DirectiveUpdate {
+                    context: Some(DirectiveValueUpdate::Set("ctx".into())),
+                    ..Default::default()
+                }
+            }
         );
     }
 
     #[test]
-    fn parse_should_return_none_for_non_directive_comments() {
-        assert_eq!(parse_lingui_directive(" some comment ").unwrap(), None);
-        assert_eq!(parse_lingui_directive(" i18n ").unwrap(), None);
+    fn parse_should_accept_bare_reset() {
+        let parsed = parse_lingui_directive(true, "*/").unwrap();
+
+        assert_eq!(
+            parsed,
+            ParsedDirective {
+                reset: true,
+                values: DirectiveUpdate::default(),
+            }
+        );
     }
 
     #[test]
     fn parse_should_reject_invalid_syntax() {
-        let error = parse_lingui_directive(" lingui-set context=single ")
+        let error = parse_lingui_directive(false, "context=single")
             .expect_err("expected parser to reject invalid syntax");
 
         assert!(error.contains("requires a value"));
@@ -302,7 +318,7 @@ mod tests {
 
     #[test]
     fn parse_should_reject_unknown_params() {
-        let error = parse_lingui_directive(r#" lingui-set unknown="value" "#)
+        let error = parse_lingui_directive(false, r#"unknown="value""#)
             .expect_err("expected parser to reject unknown params");
 
         assert!(error.contains("unknown param \"unknown\""));
@@ -310,18 +326,18 @@ mod tests {
 
     #[test]
     fn parse_should_treat_empty_strings_as_unset() {
-        let parsed = parse_lingui_directive(r#" lingui-set context="" comment="note" "#).unwrap();
+        let parsed = parse_lingui_directive(false, r#"context="" comment="note""#).unwrap();
 
         assert_eq!(
             parsed,
-            Some(ParsedDirective {
+            ParsedDirective {
                 reset: false,
                 values: DirectiveUpdate {
                     context: Some(DirectiveValueUpdate::Unset),
                     comment: Some(DirectiveValueUpdate::Set("note".into())),
                     id_prefix: None,
                 }
-            })
+            }
         );
     }
 
@@ -345,16 +361,6 @@ mod tests {
         );
     }
 
-    #[test]
-    fn collect_from_source_should_ignore_template_text_that_looks_like_comment() {
-        let directives = collect_lingui_directives_from_source(
-            "const msg = `\n// lingui-set context=\"ctx\"\n`;\n",
-            BytePos(10),
-        );
-
-        assert_eq!(directives, vec![]);
-    }
-
     #[test]
     fn collect_should_merge_and_reset_directives() {
         let directives = collect_lingui_directives_from_source(
diff --git a/crates/lingui_macro/src/comment_directive/source_scanner.rs b/crates/lingui_macro/src/comment_directive/source_scanner.rs
deleted file mode 100644
index 94bd03f..0000000
--- a/crates/lingui_macro/src/comment_directive/source_scanner.rs
+++ /dev/null
@@ -1,320 +0,0 @@
-pub struct SourceComment<'a> {
-    pub byte_offset: usize,
-    pub content: &'a str,
-    pub kind: CommentKind,
-}
-
-pub enum CommentKind {
-    Line,
-    Block,
-}
-
-pub fn scan_source_comments(source: &str) -> Vec<SourceComment<'_>> {
-    let bytes = source.as_bytes();
-    let mut comments = Vec::new();
-    let mut index = 0usize;
-
-    while index < bytes.len() {
-        match bytes[index] {
-            b'\'' | b'"' => {
-                index = skip_string_literal(bytes, index);
-            }
-            b'`' => {
-                index = skip_template_literal(bytes, index);
-            }
-            b'/' if bytes.get(index + 1) == Some(&b'/') => {
-                let comment_start = index;
-                let content_start = index + 2;
-                index = content_start;
-
-                while index < bytes.len() && bytes[index] != b'\n' {
-                    index += 1;
-                }
-
-                comments.push(SourceComment {
-                    byte_offset: comment_start,
-                    content: &source[content_start..index],
-                    kind: CommentKind::Line,
-                });
-            }
-            b'/' if bytes.get(index + 1) == Some(&b'*') => {
-                let comment_start = index;
-                let content_start = index + 2;
-                index = content_start;
-
-                while index < bytes.len() {
-                    if bytes[index] == b'*' && bytes.get(index + 1) == Some(&b'/') {
-                        break;
-                    }
-                    index += 1;
-                }
-
-                let content_end = index;
-                if index < bytes.len() {
-                    index += 2;
-                }
-
-                comments.push(SourceComment {
-                    byte_offset: comment_start,
-                    content: &source[content_start..content_end],
-                    kind: CommentKind::Block,
-                });
-            }
-            _ => {
-                index += 1;
-            }
-        }
-    }
-
-    comments
-}
-
-fn skip_string_literal(bytes: &[u8], start: usize) -> usize {
-    let delim = bytes[start];
-    let mut i = start + 1;
-    while i < bytes.len() {
-        if bytes[i] == b'\\' {
-            i = (i + 2).min(bytes.len());
-        } else if bytes[i] == delim {
-            return i + 1;
-        } else {
-            i += 1;
-        }
-    }
-    i
-}
-
-fn skip_template_literal(bytes: &[u8], start: usize) -> usize {
-    let mut i = start + 1;
-    while i < bytes.len() {
-        if bytes[i] == b'\\' {
-            i = (i + 2).min(bytes.len());
-        } else if bytes[i] == b'`' {
-            return i + 1;
-        } else {
-            i += 1;
-        }
-    }
-    i
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    fn line_comments(source: &str) -> Vec<(usize, &str)> {
-        scan_source_comments(source)
-            .into_iter()
-            .filter(|c| matches!(c.kind, CommentKind::Line))
-            .map(|c| (c.byte_offset, c.content))
-            .collect()
-    }
-
-    fn block_comments(source: &str) -> Vec<(usize, &str)> {
-        scan_source_comments(source)
-            .into_iter()
-            .filter(|c| matches!(c.kind, CommentKind::Block))
-            .map(|c| (c.byte_offset, c.content))
-            .collect()
-    }
-
-    fn all_comments(source: &str) -> Vec<(usize, &str)> {
-        scan_source_comments(source)
-            .into_iter()
-            .map(|c| (c.byte_offset, c.content))
-            .collect()
-    }
-
-    #[test]
-    fn empty_source() {
-        assert_eq!(all_comments(""), Vec::<(usize, &str)>::new());
-    }
-
-    #[test]
-    fn no_comments() {
-        assert_eq!(all_comments("const x = 1;\nlet y = 2;"), vec![]);
-    }
-
-    #[test]
-    fn single_line_comment() {
-        assert_eq!(line_comments("// hello world"), vec![(0, " hello world")]);
-    }
-
-    #[test]
-    fn line_comment_after_code() {
-        assert_eq!(
-            line_comments("const x = 1; // inline"),
-            vec![(13, " inline")]
-        );
-    }
-
-    #[test]
-    fn multiple_line_comments() {
-        let source = "// first\n// second\ncode\n// third";
-        assert_eq!(
-            line_comments(source),
-            vec![(0, " first"), (9, " second"), (24, " third")]
-        );
-    }
-
-    #[test]
-    fn single_block_comment() {
-        assert_eq!(block_comments("/* block */"), vec![(0, " block ")]);
-    }
-
-    #[test]
-    fn multiline_block_comment() {
-        let source = "/* line1\n   line2 */";
-        assert_eq!(block_comments(source), vec![(0, " line1\n   line2 ")]);
-    }
-
-    #[test]
-    fn block_comment_after_code() {
-        assert_eq!(
-            block_comments("x = 1; /* note */ y = 2;"),
-            vec![(7, " note ")]
-        );
-    }
-
-    #[test]
-    fn ignores_comment_syntax_in_single_quoted_string() {
-        assert_eq!(all_comments("const x = '// not a comment';"), vec![]);
-        assert_eq!(all_comments("const x = '/* not a comment */';"), vec![]);
-    }
-
-    #[test]
-    fn ignores_comment_syntax_in_double_quoted_string() {
-        assert_eq!(all_comments(r#"const x = "// not a comment";"#), vec![]);
-        assert_eq!(all_comments(r#"const x = "/* not a comment */";"#), vec![]);
-    }
-
-    #[test]
-    fn ignores_comment_syntax_in_template_literal() {
-        assert_eq!(all_comments("const x = `// not a comment`;"), vec![]);
-        assert_eq!(all_comments("const x = `/* not a comment */`;"), vec![]);
-    }
-
-    #[test]
-    fn handles_escaped_quotes_in_single_quoted_string() {
-        assert_eq!(
-            all_comments(r"const x = 'it\'s'; // after"),
-            vec![(19, " after")]
-        );
-    }
-
-    #[test]
-    fn handles_escaped_quotes_in_double_quoted_string() {
-        assert_eq!(
-            all_comments(r#"const x = "say \"hi\""; // after"#),
-            vec![(24, " after")]
-        );
-    }
-
-    #[test]
-    fn handles_escaped_backtick_in_template_literal() {
-        assert_eq!(
-            all_comments(r"const x = `\`template\``; // after"),
-            vec![(26, " after")]
-        );
-    }
-
-    #[test]
-    fn handles_backslash_at_end_of_string() {
-        // String ending with escape at EOF (unterminated)
-        assert_eq!(all_comments(r"const x = '\"), vec![]);
-    }
-
-    #[test]
-    fn handles_backslash_at_end_of_template() {
-        assert_eq!(all_comments("const x = `\\"), vec![]);
-    }
-
-    #[test]
-    fn unterminated_single_quoted_string() {
-        // No closing quote — scanner shouldn't panic
-        assert_eq!(all_comments("const x = 'unterminated // nope"), vec![]);
-    }
-
-    #[test]
-    fn unterminated_double_quoted_string() {
-        assert_eq!(all_comments(r#"const x = "unterminated // nope"#), vec![]);
-    }
-
-    #[test]
-    fn unterminated_template_literal() {
-        assert_eq!(all_comments("const x = `unterminated // nope"), vec![]);
-    }
-
-    #[test]
-    fn unterminated_block_comment() {
-        // Block comment that never closes — content runs to end
-        assert_eq!(
-            block_comments("/* never closed"),
-            vec![(0, " never closed")]
-        );
-    }
-
-    #[test]
-    fn mixed_comment_types() {
-        let source = "// line\n/* block */\ncode // inline";
-        let comments = all_comments(source);
-        assert_eq!(
-            comments,
-            vec![(0, " line"), (8, " block "), (25, " inline")]
-        );
-    }
-
-    #[test]
-    fn slash_not_followed_by_slash_or_star() {
-        // Division operator should not be mistaken for comment
-        assert_eq!(all_comments("const x = 10 / 2;"), vec![]);
-    }
-
-    #[test]
-    fn empty_line_comment() {
-        assert_eq!(line_comments("//\ncode"), vec![(0, "")]);
-    }
-
-    #[test]
-    fn empty_block_comment() {
-        assert_eq!(block_comments("/**/"), vec![(0, "")]);
-    }
-
-    #[test]
-    fn block_comment_with_star_inside() {
-        assert_eq!(block_comments("/* a * b */"), vec![(0, " a * b ")]);
-    }
-
-    #[test]
-    fn consecutive_block_comments() {
-        // "/* a */" = 7 bytes, so second comment starts at offset 7
-        assert_eq!(
-            block_comments("/* a *//* b */"),
-            vec![(0, " a "), (7, " b ")]
-        );
-    }
-
-    #[test]
-    fn line_comment_at_eof_without_newline() {
-        assert_eq!(line_comments("// eof"), vec![(0, " eof")]);
-    }
-
-    #[test]
-    fn comment_after_template_literal_with_expressions() {
-        // Template with ${} — the simplified scanner treats it as text until closing backtick
-        let source = "const x = `hello ${world}`; // after";
-        assert_eq!(line_comments(source), vec![(28, " after")]);
-    }
-
-    #[test]
-    fn multiline_template_literal_with_comment_like_content() {
-        let source = "const x = `\n// fake\n/* also fake */\n`;\n// real";
-        assert_eq!(line_comments(source), vec![(39, " real")]);
-    }
-
-    #[test]
-    fn string_containing_backslash_n_is_not_newline() {
-        // The literal text \n in a string (escaped), not a real newline
-        assert_eq!(all_comments("const x = '\\n'; // yes"), vec![(16, " yes")]);
-    }
-}
diff --git a/crates/lingui_macro/tests/lingui_directive.rs b/crates/lingui_macro/tests/lingui_directive.rs
index 6865119..5aac9ea 100644
--- a/crates/lingui_macro/tests/lingui_directive.rs
+++ b/crates/lingui_macro/tests/lingui_directive.rs
@@ -253,6 +253,29 @@ to!(
     "#
 );
 
+to!(
+    jsx_trans_with_unclosed_quotes,
+    LinguiOptions {
+        id_prefix_leader: Some(".".into()),
+        ..Default::default()
+    },
+    r#"
+      // lingui-set idPrefix="root"
+      import type { MessageDescriptor } from '@lingui/core'
+      import { msg, t } from '@lingui/core/macro'
+      import { Trans } from '@lingui/react/macro'
+
+      const X = () => <p>'</p>
+      const Y = () => <p>`</p>
+
+      // lingui-set idPrefix="different"
+      const different = {
+        a: msg({ id: '.a', message: `different a` }),
+        b: msg({ id: '.b', message: `different b` }),
+      } as const satisfies Record<string, MessageDescriptor>
+    "#
+);
+
 to!(
     jsx_trans_with_directive_context,
     r#"
diff --git a/crates/lingui_macro/tests/snapshots/lingui_directive__jsx_trans_with_unclosed_quotes.snap b/crates/lingui_macro/tests/snapshots/lingui_directive__jsx_trans_with_unclosed_quotes.snap
new file mode 100644
index 0000000..47d14ec
--- /dev/null
+++ b/crates/lingui_macro/tests/snapshots/lingui_directive__jsx_trans_with_unclosed_quotes.snap
@@ -0,0 +1,36 @@
+---
+source: crates/lingui_macro/tests/lingui_directive.rs
+info:
+  id_prefix_leader: "."
+---
+// lingui-set idPrefix="root"
+import type { MessageDescriptor } from '@lingui/core'
+import { msg, t } from '@lingui/core/macro'
+import { Trans } from '@lingui/react/macro'
+
+const X = () => <p>'</p>
+const Y = () => <p>`</p>
+
+// lingui-set idPrefix="different"
+const different = {
+  a: msg({ id: '.a', message: `different a` }),
+  b: msg({ id: '.b', message: `different b` }),
+} as const satisfies Record<string, MessageDescriptor>
+
+↓ ↓ ↓ ↓ ↓ ↓
+
+// lingui-set idPrefix="root"
+import type { MessageDescriptor } from '@lingui/core';
+const X = ()=><p>'</p>;
+const Y = ()=><p>`</p>;
+// lingui-set idPrefix="different"
+const different = {
+    a: /*i18n*/ {
+        id: "different.a",
+        message: "different a"
+    },
+    b: /*i18n*/ {
+        id: "different.b",
+        message: "different b"
+    }
+} as const satisfies Record<string, MessageDescriptor>;