Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,38 @@
# Changelog for `distributors`

## 0.6.0.0 - 2026-04-24

### New Module

- Added `Control.Lens.Grammar.Machine` as the transducer/matching runtime layer.

### New Types

- Added `Transducer` and `TransducerStep` as the finite-state representation for
compiled grammar machines.

### New APIs (Machine Runtime)

- Added `transducer` to compile `Bnf (RegEx token)` into `Transducer`.
- Added `parseForest` to reconstruct parse forests with rule labels and token spans/slices,
returning the remaining unparsed suffix.
- Added `expectNext` to compute scanner-frontier expected token classes after a prefix.
- Added `languageSample` to lazily enumerate sampled language words from shortest length upward.
- Added `unreachableRules` to report dead nonterminals unreachable from the start expression.

### Internal Machinery

- Implemented Thompson-style transducer construction over `RegEx`-extended BNF.
- Implemented Earley-style chart runtime (`initialChart`, `closeChartAt`, `scanClassOptions`,
`prefixGen`) with predict/complete closure and scanner grouping.
- Added completion-time caller indexing/cache optimizations and precomputed rule nullability/first-state
indexing to speed machine execution.

### Grammar Integration

- `Control.Lens.Grammar` now exposes machine-backed generators:
`transducerG` and the parse-forest examples/docs built on the Machine runtime.

## 0.5.0.0 - 2026-04-16

### Changes
Expand Down
10 changes: 9 additions & 1 deletion distributors.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ cabal-version: 2.2
-- see: https://github.com/sol/hpack

name: distributors
version: 0.5.0.0
version: 0.6.0.0
synopsis: Unifying Parsers, Printers & Grammars
description: Distributors provides mathematically inspired abstractions for coders to write parsers that can also be inverted to printers.
category: Profunctors, Optics, Parsing
Expand Down Expand Up @@ -36,6 +36,7 @@ library
Control.Lens.Grammar.Internal.NestedPrismTH
Control.Lens.Grammar.Internal.Orphanage
Control.Lens.Grammar.Kleene
Control.Lens.Grammar.Machine
Control.Lens.Grammar.Symbol
Control.Lens.Grammar.Token
Control.Lens.Grate
Expand Down Expand Up @@ -98,6 +99,7 @@ library
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints
build-depends:
MemoTrie >=0.6 && <1
, QuickCheck >=2.14 && <3
, adjunctions >=4.4 && <5
, base >=4.15 && <5
, bifunctors >=5.5 && <6
Expand All @@ -106,12 +108,15 @@ library
, contravariant >=1.5 && <2
, distributive >=0.6 && <1
, lens >=5.0 && <6
, monad-loops >=0.4.3 && <1
, mtl >=2.2 && <3
, profunctors >=5.6 && <6
, random >=1.2 && <2
, tagged >=0.8 && <1
, template-haskell >=2.17 && <3
, text ==2.*
, th-abstraction >=0.4 && <1
, transformers >=0.5 && <1
, vector >=0.12 && <1
, witherable >=0.4 && <1
default-language: Haskell2010
Expand Down Expand Up @@ -188,12 +193,15 @@ test-suite test
, hspec >=2.7 && <3
, lens >=5.0 && <6
, megaparsec >=9.0 && <10
, monad-loops >=0.4.3 && <1
, mtl >=2.2 && <3
, profunctors >=5.6 && <6
, random >=1.2 && <2
, tagged >=0.8 && <1
, template-haskell >=2.17 && <3
, text ==2.*
, th-abstraction >=0.4 && <1
, transformers >=0.5 && <1
, vector >=0.12 && <1
, witherable >=0.4 && <1
default-language: Haskell2010
6 changes: 5 additions & 1 deletion package.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: distributors
version: 0.5.0.0
version: 0.6.0.0
github: "morphismtech/distributors"
license: BSD-3-Clause
author: "Eitan Chatav"
Expand Down Expand Up @@ -27,7 +27,11 @@ dependencies:
- distributive >= 0.6 && < 1
- lens >= 5.0 && < 6
- MemoTrie >= 0.6 && < 1
- QuickCheck >= 2.14 && < 3
- monad-loops >=0.4.3 && < 1
- mtl >= 2.2 && < 3
- random >= 1.2 && < 2
- transformers >= 0.5 && < 1
- profunctors >= 5.6 && < 6
- tagged >= 0.8 && < 1
- template-haskell >= 2.17 && < 3
Expand Down
68 changes: 68 additions & 0 deletions src/Control/Lens/Grammar.hs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ module Control.Lens.Grammar
, regbnfG
, regbnfGrammar
, applicativeG
, transducerG
-- * Context-sensitive grammar
, CtxGrammar
, printG
Expand All @@ -45,6 +46,7 @@ import Control.Lens.PartialIso
import Control.Lens.Grammar.BackusNaur
import Control.Lens.Grammar.Boole
import Control.Lens.Grammar.Kleene
import Control.Lens.Grammar.Machine
import Control.Lens.Grammar.Token
import Control.Lens.Grammar.Symbol
import Data.Bifunctor.Joker
Expand All @@ -67,6 +69,7 @@ import Witherable
import Control.Lens.Grammar.BackusNaur as X
import Control.Lens.Grammar.Boole as X
import Control.Lens.Grammar.Kleene as X
import Control.Lens.Grammar.Machine as X
import Control.Lens.Grammar.Symbol as X
import Control.Lens.Grammar.Token as X
import Control.Lens.PartialIso as X
Expand Down Expand Up @@ -789,6 +792,71 @@ It can apply to a `RegGrammar`.
regbnfG :: Grammar Char a -> RegBnf
regbnfG bnf = runGrammor bnf

{- | Compile a `Grammar` into a `Transducer`.

>>> let regexMachine = transducerG @Char regexGrammar

A transducer is a form of finite state machine,
usable as an intermediary for further generators like
`=~`, `expectNext`, `languageSample`, `parseForest` & `unreachableRules`.

>>> import Test.QuickCheck
>>> let regexLang = languageSample @Char regexMachine
>>> words100 <- generate (take 100 <$> regexLang)
>>> quickCheck (property (all (=~ regexMachine) words100))
+++ OK, passed 1 test.
>>> import Control.Monad.State
>>> import System.Random
>>> let gen = mkStdGen 69
>>> evalState (take 15 <$> regexLang) gen
["","|","\776269","()","[]","\\[","||","|\249908","\770923*","\1008821+","\318904?","\845807|","\477898\1026934","()*","()+"]

>>> import Data.Tree (drawForest)

@>>> let (forest, _) = parseForest regexMachine "xy|z" in putStr (drawForest (map (fmap show) forest))
("regex",0,4,"xy|z")
|
`- ("alternate",0,4,"xy|z")
|
+- ("sequence",0,2,"xy")
| |
| +- ("expression",0,1,"x")
| | |
| | `- ("atom",0,1,"x")
| | |
| | `- ("class",0,1,"x")
| | |
| | `- ("class-one-of",0,1,"x")
| | |
| | `- ("char",0,1,"x")
| |
| `- ("expression",1,2,"y")
| |
| `- ("atom",1,2,"y")
| |
| `- ("class",1,2,"y")
| |
| `- ("class-one-of",1,2,"y")
| |
| `- ("char",1,2,"y")
|
`- ("sequence",3,4,"z")
|
`- ("expression",3,4,"z")
|
`- ("atom",3,4,"z")
|
`- ("class",3,4,"z")
|
`- ("class-one-of",3,4,"z")
|
`- ("char",3,4,"z")
@

-}
transducerG :: Categorized token => Grammar token a -> Transducer token
transducerG bnf = transducer (runGrammor bnf)

{- | `printG` generates a printer from a `CtxGrammar`.
Since both `RegGrammar`s and context-free `Grammar`s are `CtxGrammar`s,
the type system will allow `printG` to be applied to them.
Expand Down
16 changes: 0 additions & 16 deletions src/Control/Lens/Grammar/BackusNaur.hs
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,10 @@ module Control.Lens.Grammar.BackusNaur
, liftBnf0
, liftBnf1
, liftBnf2
-- * Matching
, Matching (..)
, diffB
) where

import Control.Lens
import Control.Lens.Extras
import Control.Lens.Grammar.Kleene
import Control.Lens.Grammar.Token
import Control.Lens.Grammar.Symbol
Expand Down Expand Up @@ -107,11 +104,6 @@ liftBnf2
liftBnf2 f (Bnf start0 rules0) (Bnf start1 rules1) =
Bnf (f start0 start1) (Set.map coerce rules0 <> Set.map coerce rules1)

-- | Does a word match a pattern?
class Matching word pattern | pattern -> word where
(=~) :: word -> pattern -> Bool
infix 2 =~

{- |
The [Brzozowski derivative]
(https://dl.acm.org/doi/pdf/10.1145/321239.321249) of a
Expand Down Expand Up @@ -210,11 +202,3 @@ instance (Ord rule, Monoid rule) => Monoid (Bnf rule) where
mempty = liftBnf0 mempty
instance (Ord rule, Semigroup rule) => Semigroup (Bnf rule) where
(<>) = liftBnf2 (<>)
instance (Categorized token, HasTrie token)
=> Matching [token] (Bnf (RegEx token)) where
(=~) word = δ . diffB word
instance (Categorized token, HasTrie token)
=> Matching [token] (RegEx token) where
word =~ pattern = word =~ liftBnf0 pattern
instance Matching s (APrism s t a b) where
word =~ pattern = is pattern word
25 changes: 25 additions & 0 deletions src/Control/Lens/Grammar/Kleene.hs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ import Control.Applicative
import Control.Lens.Grammar.Boole
import Control.Lens.Grammar.Symbol
import Control.Lens.Grammar.Token
import Control.Monad.Loops
import Control.Monad.State (StateT, state)
import Data.Bifunctor.Joker
import Data.Foldable
import Data.MemoTrie
Expand All @@ -38,6 +40,10 @@ import Data.Profunctor.Distributor
import Data.Set (Set)
import qualified Data.Set as Set
import GHC.Generics
import System.Random (RandomGen, Random, random)
import Test.QuickCheck.Arbitrary
import Test.QuickCheck.Gen (Gen)
import qualified Test.QuickCheck.Gen as Gen
import Text.ParserCombinators.ReadP (ReadP)
import qualified Text.ParserCombinators.ReadP as ReadP

Expand Down Expand Up @@ -225,6 +231,25 @@ instance TokenAlgebra token (f token)
tokenClass = Joker . tokenClass
instance TokenAlgebra Char (ReadP Char) where
tokenClass = ReadP.satisfy . tokenClass
instance (Categorized token, Arbitrary token) => TokenAlgebra token (Gen token) where
tokenClass (TokenClass exam) = case exam of
OneOf xs -> oneOf xs
NotOneOf xs (AndAsIn cat) -> arbitrary `Gen.suchThat`
(\x -> x `notElem` xs && categorize x == cat)
NotOneOf xs (AndNotAsIn cats) -> arbitrary `Gen.suchThat`
(\x -> x `notElem` xs && categorize x `notElem` cats)
Alternate cls1 cls2 -> Gen.oneof [tokenClass cls1, tokenClass cls2]
instance (RandomGen g, Monad m, Categorized token, Random token)
=> TokenAlgebra token (StateT g m token) where
tokenClass (TokenClass exam) = case exam of
OneOf xs -> oneOf xs
NotOneOf xs (AndAsIn cat) ->
iterateUntil (\x -> x `notElem` xs && categorize x == cat) anyToken
NotOneOf xs (AndNotAsIn cats) ->
iterateUntil (\x -> x `notElem` xs && categorize x `notElem` cats) anyToken
Alternate cls1 cls2 -> do
b <- state random
if (b :: Bool) then tokenClass cls1 else tokenClass cls2
instance Categorized token => Monoid (RegEx token) where
mempty = SeqEmpty
instance Categorized token => Semigroup (RegEx token) where
Expand Down
Loading
Loading