morphismtech · echatav · Apr 25, 2026 · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,38 @@
 # Changelog for `distributors`
 
+## 0.6.0.0 - 2026-04-24
+
+### New Module
+
+- Added `Control.Lens.Grammar.Machine` as the transducer/matching runtime layer.
+
+### New Types
+
+- Added `Transducer` and `TransducerStep` as the finite-state representation for
+  compiled grammar machines.
+
+### New APIs (Machine Runtime)
+
+- Added `transducer` to compile `Bnf (RegEx token)` into `Transducer`.
+- Added `parseForest` to reconstruct parse forests with rule labels and token spans/slices,
+  returning the remaining unparsed suffix.
+- Added `expectNext` to compute scanner-frontier expected token classes after a prefix.
+- Added `languageSample` to lazily enumerate sampled language words from shortest length upward.
+- Added `unreachableRules` to report dead nonterminals unreachable from the start expression.
+
+### Internal Machinery
+
+- Implemented Thompson-style transducer construction over `RegEx`-extended BNF.
+- Implemented Earley-style chart runtime (`initialChart`, `closeChartAt`, `scanClassOptions`,
+  `prefixGen`) with predict/complete closure and scanner grouping.
+- Added completion-time caller indexing/cache optimizations and precomputed rule nullability/first-state
+  indexing to speed machine execution.
+
+### Grammar Integration
+
+- `Control.Lens.Grammar` now exposes machine-backed generators:
+  `transducerG` and the parse-forest examples/docs built on the Machine runtime.
+
 ## 0.5.0.0 - 2026-04-16
 
 ### Changes

diff --git a/distributors.cabal b/distributors.cabal
@@ -5,7 +5,7 @@ cabal-version: 2.2
 -- see: https://github.com/sol/hpack
 
 name:           distributors
-version:        0.5.0.0
+version:        0.6.0.0
 synopsis:       Unifying Parsers, Printers & Grammars
 description:    Distributors provides mathematically inspired abstractions for coders to write parsers that can also be inverted to printers.
 category:       Profunctors, Optics, Parsing
@@ -36,6 +36,7 @@ library
       Control.Lens.Grammar.Internal.NestedPrismTH
       Control.Lens.Grammar.Internal.Orphanage
       Control.Lens.Grammar.Kleene
+      Control.Lens.Grammar.Machine
       Control.Lens.Grammar.Symbol
       Control.Lens.Grammar.Token
       Control.Lens.Grate
@@ -98,6 +99,7 @@ library
   ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints
   build-depends:
       MemoTrie >=0.6 && <1
+    , QuickCheck >=2.14 && <3
     , adjunctions >=4.4 && <5
     , base >=4.15 && <5
     , bifunctors >=5.5 && <6
@@ -106,12 +108,15 @@ library
     , contravariant >=1.5 && <2
     , distributive >=0.6 && <1
     , lens >=5.0 && <6
+    , monad-loops >=0.4.3 && <1
     , mtl >=2.2 && <3
     , profunctors >=5.6 && <6
+    , random >=1.2 && <2
     , tagged >=0.8 && <1
     , template-haskell >=2.17 && <3
     , text ==2.*
     , th-abstraction >=0.4 && <1
+    , transformers >=0.5 && <1
     , vector >=0.12 && <1
     , witherable >=0.4 && <1
   default-language: Haskell2010
@@ -188,12 +193,15 @@ test-suite test
     , hspec >=2.7 && <3
     , lens >=5.0 && <6
     , megaparsec >=9.0 && <10
+    , monad-loops >=0.4.3 && <1
     , mtl >=2.2 && <3
     , profunctors >=5.6 && <6
+    , random >=1.2 && <2
     , tagged >=0.8 && <1
     , template-haskell >=2.17 && <3
     , text ==2.*
     , th-abstraction >=0.4 && <1
+    , transformers >=0.5 && <1
     , vector >=0.12 && <1
     , witherable >=0.4 && <1
   default-language: Haskell2010
diff --git a/package.yaml b/package.yaml
@@ -1,5 +1,5 @@
 name: distributors
-version: 0.5.0.0
+version: 0.6.0.0
 github: "morphismtech/distributors"
 license: BSD-3-Clause
 author: "Eitan Chatav"
@@ -27,7 +27,11 @@ dependencies:
 - distributive >= 0.6 && < 1
 - lens >= 5.0 && < 6
 - MemoTrie >= 0.6 && < 1
+- QuickCheck >= 2.14 && < 3
+- monad-loops >=0.4.3 && < 1
 - mtl >= 2.2 && < 3
+- random >= 1.2 && < 2
+- transformers >= 0.5 && < 1
 - profunctors >= 5.6 && < 6
 - tagged >= 0.8 && < 1
 - template-haskell >= 2.17 && < 3

diff --git a/src/Control/Lens/Grammar.hs b/src/Control/Lens/Grammar.hs
@@ -24,6 +24,7 @@ module Control.Lens.Grammar
   , regbnfG
   , regbnfGrammar
   , applicativeG
+  , transducerG
     -- * Context-sensitive grammar
   , CtxGrammar
   , printG
@@ -45,6 +46,7 @@ import Control.Lens.PartialIso
 import Control.Lens.Grammar.BackusNaur
 import Control.Lens.Grammar.Boole
 import Control.Lens.Grammar.Kleene
+import Control.Lens.Grammar.Machine
 import Control.Lens.Grammar.Token
 import Control.Lens.Grammar.Symbol
 import Data.Bifunctor.Joker
@@ -67,6 +69,7 @@ import Witherable
 import Control.Lens.Grammar.BackusNaur as X
 import Control.Lens.Grammar.Boole as X
 import Control.Lens.Grammar.Kleene as X
+import Control.Lens.Grammar.Machine as X
 import Control.Lens.Grammar.Symbol as X
 import Control.Lens.Grammar.Token as X
 import Control.Lens.PartialIso as X
@@ -789,6 +792,71 @@ It can apply to a `RegGrammar`.
 regbnfG :: Grammar Char a -> RegBnf
 regbnfG bnf = runGrammor bnf
 
+{- | Compile a `Grammar` into a `Transducer`.
+
+>>> let regexMachine = transducerG @Char regexGrammar
+
+A transducer is a form of finite state machine,
+usable as an intermediary for further generators like
+`=~`, `expectNext`, `languageSample`, `parseForest` & `unreachableRules`.
+
+>>> import Test.QuickCheck
+>>> let regexLang = languageSample @Char regexMachine
+>>> words100 <- generate (take 100 <$> regexLang)
+>>> quickCheck (property (all (=~ regexMachine) words100))
++++ OK, passed 1 test.
+>>> import Control.Monad.State
+>>> import System.Random
+>>> let gen = mkStdGen 69
+>>> evalState (take 15 <$> regexLang) gen
+["","|","\776269","()","[]","\\[","||","|\249908","\770923*","\1008821+","\318904?","\845807|","\477898\1026934","()*","()+"]
+
+>>> import Data.Tree (drawForest)
+
+@>>> let (forest, _) = parseForest regexMachine "xy|z" in putStr (drawForest (map (fmap show) forest))
+("regex",0,4,"xy|z")
+|
+`- ("alternate",0,4,"xy|z")
+   |
+   +- ("sequence",0,2,"xy")
+   |  |
+   |  +- ("expression",0,1,"x")
+   |  |  |
+   |  |  `- ("atom",0,1,"x")
+   |  |     |
+   |  |     `- ("class",0,1,"x")
+   |  |        |
+   |  |        `- ("class-one-of",0,1,"x")
+   |  |           |
+   |  |           `- ("char",0,1,"x")
+   |  |
+   |  `- ("expression",1,2,"y")
+   |     |
+   |     `- ("atom",1,2,"y")
+   |        |
+   |        `- ("class",1,2,"y")
+   |           |
+   |           `- ("class-one-of",1,2,"y")
+   |              |
+   |              `- ("char",1,2,"y")
+   |
+   `- ("sequence",3,4,"z")
+      |
+      `- ("expression",3,4,"z")
+         |
+         `- ("atom",3,4,"z")
+            |
+            `- ("class",3,4,"z")
+               |
+               `- ("class-one-of",3,4,"z")
+                  |
+                  `- ("char",3,4,"z")
+@
+
+-}
+transducerG :: Categorized token => Grammar token a -> Transducer token
+transducerG bnf = transducer (runGrammor bnf)
+
 {- | `printG` generates a printer from a `CtxGrammar`.
 Since both `RegGrammar`s and context-free `Grammar`s are `CtxGrammar`s,
 the type system will allow `printG` to be applied to them.

diff --git a/src/Control/Lens/Grammar/BackusNaur.hs b/src/Control/Lens/Grammar/BackusNaur.hs
@@ -19,13 +19,10 @@ module Control.Lens.Grammar.BackusNaur
   , liftBnf0
   , liftBnf1
   , liftBnf2
-    -- * Matching
-  , Matching (..)
   , diffB
   ) where
 
 import Control.Lens
-import Control.Lens.Extras
 import Control.Lens.Grammar.Kleene
 import Control.Lens.Grammar.Token
 import Control.Lens.Grammar.Symbol
@@ -107,11 +104,6 @@ liftBnf2
 liftBnf2 f (Bnf start0 rules0) (Bnf start1 rules1) =
   Bnf (f start0 start1) (Set.map coerce rules0 <> Set.map coerce rules1)
 
--- | Does a word match a pattern?
-class Matching word pattern | pattern -> word where
-  (=~) :: word -> pattern -> Bool
-  infix 2 =~
-
 {- |
 The [Brzozowski derivative]
 (https://dl.acm.org/doi/pdf/10.1145/321239.321249) of a
@@ -210,11 +202,3 @@ instance (Ord rule, Monoid rule) => Monoid (Bnf rule) where
   mempty = liftBnf0 mempty
 instance (Ord rule, Semigroup rule) => Semigroup (Bnf rule) where
   (<>) = liftBnf2 (<>)
-instance (Categorized token, HasTrie token)
-  => Matching [token] (Bnf (RegEx token)) where
-    (=~) word = δ . diffB word
-instance (Categorized token, HasTrie token)
-  => Matching [token] (RegEx token) where
-    word =~ pattern = word =~ liftBnf0 pattern
-instance Matching s (APrism s t a b) where
-  word =~ pattern = is pattern word
diff --git a/src/Control/Lens/Grammar/Kleene.hs b/src/Control/Lens/Grammar/Kleene.hs
@@ -29,6 +29,8 @@ import Control.Applicative
 import Control.Lens.Grammar.Boole
 import Control.Lens.Grammar.Symbol
 import Control.Lens.Grammar.Token
+import Control.Monad.Loops
+import Control.Monad.State (StateT, state)
 import Data.Bifunctor.Joker
 import Data.Foldable
 import Data.MemoTrie
@@ -38,6 +40,10 @@ import Data.Profunctor.Distributor
 import Data.Set (Set)
 import qualified Data.Set as Set
 import GHC.Generics
+import System.Random (RandomGen, Random, random)
+import Test.QuickCheck.Arbitrary
+import Test.QuickCheck.Gen (Gen)
+import qualified Test.QuickCheck.Gen as Gen
 import Text.ParserCombinators.ReadP (ReadP)
 import qualified Text.ParserCombinators.ReadP as ReadP
 
@@ -225,6 +231,25 @@ instance TokenAlgebra token (f token)
     tokenClass = Joker . tokenClass
 instance TokenAlgebra Char (ReadP Char) where
   tokenClass = ReadP.satisfy . tokenClass
+instance (Categorized token, Arbitrary token) => TokenAlgebra token (Gen token) where
+  tokenClass (TokenClass exam) = case exam of
+    OneOf xs -> oneOf xs
+    NotOneOf xs (AndAsIn cat) -> arbitrary `Gen.suchThat`
+      (\x -> x `notElem` xs && categorize x == cat)
+    NotOneOf xs (AndNotAsIn cats) -> arbitrary `Gen.suchThat`
+      (\x -> x `notElem` xs && categorize x `notElem` cats)
+    Alternate cls1 cls2 -> Gen.oneof [tokenClass cls1, tokenClass cls2]
+instance (RandomGen g, Monad m, Categorized token, Random token)
+  => TokenAlgebra token (StateT g m token) where
+  tokenClass (TokenClass exam) = case exam of
+    OneOf xs -> oneOf xs
+    NotOneOf xs (AndAsIn cat) ->
+      iterateUntil (\x -> x `notElem` xs && categorize x == cat) anyToken
+    NotOneOf xs (AndNotAsIn cats) ->
+      iterateUntil (\x -> x `notElem` xs && categorize x `notElem` cats) anyToken
+    Alternate cls1 cls2 -> do
+      b <- state random
+      if (b :: Bool) then tokenClass cls1 else tokenClass cls2
 instance Categorized token => Monoid (RegEx token) where
   mempty = SeqEmpty
 instance Categorized token => Semigroup (RegEx token) where