From c66bbf1fffdfe2d13f83d9195b199accef9b0faa Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Sun, 22 Feb 2026 08:07:28 +0530 Subject: [PATCH 1/4] formatting --- inst/parseWilkinsonFormula.m | 42 ++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/inst/parseWilkinsonFormula.m b/inst/parseWilkinsonFormula.m index 91315e40..f38382e2 100644 --- a/inst/parseWilkinsonFormula.m +++ b/inst/parseWilkinsonFormula.m @@ -201,7 +201,7 @@ else lhs_vars = resolve_lhs_symbolic (lhs_str); endif - + ## build the required output. varargout{1} = run_equation_builder (lhs_vars, rhs_terms); @@ -519,14 +519,14 @@ args_str_parts = {}; for k = 1:length (node.args) arg_res = run_expander (node.args{k}, mode); - + if (! isempty (arg_res) && ! isempty (arg_res{1})) - args_str_parts{end+1} = arg_res{1}{1}; + args_str_parts{end+1} = arg_res{1}{1}; else args_str_parts{end+1} = ''; endif endfor - + full_term = sprintf ("%s(%s)", node.name, strjoin (args_str_parts, ',')); result = {{full_term}}; else @@ -1140,14 +1140,14 @@ ## process RHS rhs_tokens = run_lexer (rhs_str); [rhs_tree, ~] = run_parser (rhs_tokens); - + wrapper.type = 'OPERATOR'; wrapper.value = '~'; wrapper.left = []; wrapper.right = rhs_tree; - + expanded = run_expander (wrapper, mode); - + ## extract the terms. if (isstruct (expanded) && isfield (expanded, 'model')) rhs_terms = expanded.model; @@ -1164,22 +1164,22 @@ for i = 1:length (parts) p = strtrim (parts{i}); if (isempty (p)), continue; endif - + range_parts = strsplit (p, '-'); - + if (length (range_parts) == 2) s_str = strtrim (range_parts{1}); e_str = strtrim (range_parts{2}); - + [s_tok] = regexp (s_str, '^([a-zA-Z_]\w*)(\d+)$', 'tokens'); [e_tok] = regexp (e_str, '^([a-zA-Z_]\w*)(\d+)$', 'tokens'); - + if (! isempty (s_tok) && ! isempty (e_tok)) prefix = s_tok{1}{1}; s_num = str2double (s_tok{1}{2}); e_prefix = e_tok{1}{1}; e_num = str2double (e_tok{1}{2}); - + if (strcmp (prefix, e_prefix) && s_num <= e_num) for n = s_num:e_num vars{end+1} = sprintf ("%s%d", prefix, n); @@ -1203,7 +1203,7 @@ for i = 1:length (rhs_terms) t = rhs_terms{i}; if (isempty (t)) - term_strs{end+1} = ''; + term_strs{end+1} = ''; else if (length (t) == 1 && any (strfind (t{1}, "("))) term_strs{end+1} = t{1}; @@ -1229,13 +1229,13 @@ rhs_parts{end+1} = sprintf ("%s*%s", coeff, t_str); endif endfor - + full_rhs = strjoin (rhs_parts, ' + '); if (isempty (full_rhs)), full_rhs = '0'; endif lines{end+1} = sprintf ("%s = %s", lhs_vars{k}, full_rhs); endfor - eq_list = string (lines'); + eq_list = string (lines'); endfunction %!demo @@ -1271,7 +1271,7 @@ %!demo %! -%! ## Interaction Effects : +%! ## Interaction Effects : %! ## We analyze Relief Score based on Drug Type and Dosage Level. %! ## The '*' operator expands to the main effects PLUS the interaction term. %! ## Categorical variables are automatically created. @@ -1287,11 +1287,11 @@ %!demo %! -%! ## Polynomial Regression : +%! ## Polynomial Regression : %! ## Uses the power operator (^) to model non-linear relationships. %! Distance = [20; 45; 80; 125]; %! Speed = [30; 50; 70; 90]; -%! Speed_2 = Speed .^ 2; +%! Speed_2 = Speed .^ 2; %! t = table (Distance, Speed, Speed_2, 'VariableNames', {'Distance', 'Speed', 'Speed^2'}); %! %! formula = 'Distance ~ Speed^2'; @@ -1316,7 +1316,7 @@ %!demo %! -%! ## Explicit Nesting : +%! ## Explicit Nesting : %! ## The parser also supports the explicit 'B(A)' syntax, which means %! ## 'B is nested within A'. This is equivalent to the interaction 'A:B' %! ## but often used to denote random effects or specific hierarchy. @@ -1327,7 +1327,7 @@ %!demo %! -%! ## Excluding Terms : +%! ## Excluding Terms : %! ## Demonstrates building a complex model and then simplifying it. %! ## We define a full 3-way interaction (A*B*C) but explicitly remove the %! ## three-way term (A:B:C) using the minus operator. @@ -1338,7 +1338,7 @@ %!demo %! -%! ## Repeated Measures : +%! ## Repeated Measures : %! ## This allows predicting multiple outcomes simultaneously. %! ## The range operator '-' selects all variables between 'T1' and 'T3' %! ## as the response matrix Y. From a04517d816654cd586f4c0a6c790d98630b66cba Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Sun, 22 Feb 2026 08:09:10 +0530 Subject: [PATCH 2/4] FIX - sorting left-to-right hierarchy --- inst/parseWilkinsonFormula.m | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/inst/parseWilkinsonFormula.m b/inst/parseWilkinsonFormula.m index f38382e2..8761495a 100644 --- a/inst/parseWilkinsonFormula.m +++ b/inst/parseWilkinsonFormula.m @@ -829,14 +829,20 @@ terms_mat(i, idx) = 1; endfor - ## sorting : order by order. + ## sorting : order by order (ascending), then by variable sequence (descending) term_orders = sum (terms_mat, 2); M = [term_orders, terms_mat]; + ## Create unique rows first [~, unique_idx] = unique (M, 'rows'); terms_mat = terms_mat (unique_idx, :); + M = M (unique_idx, :); - [~, sort_idx] = sortrows ([sum(terms_mat, 2), terms_mat]); + ## Create the direction vector: [1, -2, -3, -4, ...] + sort_dirs = [1, -(2:size(M, 2))]; + + ## Sort using the direction vector + [~, sort_idx] = sortrows (M, sort_dirs); schema.Terms = terms_mat (sort_idx, :); endfunction From f3da99b9351d2ccc3263cc18fb4ee5354f3b92ec Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Sun, 22 Feb 2026 08:11:32 +0530 Subject: [PATCH 3/4] revelant test --- inst/parseWilkinsonFormula.m | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/inst/parseWilkinsonFormula.m b/inst/parseWilkinsonFormula.m index 8761495a..3b066817 100644 --- a/inst/parseWilkinsonFormula.m +++ b/inst/parseWilkinsonFormula.m @@ -1665,6 +1665,24 @@ %! eq = parseWilkinsonFormula ('y ~ A - A', 'equation'); %! expected = string('y = c1'); %! assert (isequal (eq, expected)); +%!test +%! ## Verify parseWilkinsonFormula schema matches MATLAB fitlm sorting +%! formula = 'Y ~ x1 * x2 * x3'; +%! schema = parseWilkinsonFormula(formula, 'matrix'); +%! +%! ## Expected Octave Binary Matrix (8 rows, 4 columns) +%! ## Columns are sorted alphabetically by the parser: {'Y', 'x1', 'x2', 'x3'} +%! expected_terms = [0, 0, 0, 0; ## (Intercept) +%! 0, 1, 0, 0; ## x1 +%! 0, 0, 1, 0; ## x2 +%! 0, 0, 0, 1; ## x3 +%! 0, 1, 1, 0; ## x1:x2 +%! 0, 1, 0, 1; ## x1:x3 +%! 0, 0, 1, 1; ## x2:x3 +%! 0, 1, 1, 1]; ## x1:x2:x3 +%! +%! assert(schema.VariableNames, {'Y', 'x1', 'x2', 'x3'}); +%! assert(schema.Terms, expected_terms); %!error parseWilkinsonFormula () %!error parseWilkinsonFormula ('y ~ x', 'invalid_mode') %!error parseWilkinsonFormula ('', 'parse') From e472b92a4a6dd53d07bbbd8211d4182ea405a302 Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Sun, 22 Feb 2026 09:34:43 +0530 Subject: [PATCH 4/4] contributor --- inst/parseWilkinsonFormula.m | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/parseWilkinsonFormula.m b/inst/parseWilkinsonFormula.m index 3b066817..d4563897 100644 --- a/inst/parseWilkinsonFormula.m +++ b/inst/parseWilkinsonFormula.m @@ -1,5 +1,6 @@ ## Copyright (C) 2026 Andreas Bertsatos ## Copyright (C) 2026 Avanish Salunke +## Copyright (C) 2026 Jayant Chauhan <0001jayant@gmail.com> ## ## This file is part of the statistics package for GNU Octave. ##