From a9d72017812e9c1d984422d5ac84fb7e3611fafe Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Sun, 14 Jun 2026 14:48:40 +0530 Subject: [PATCH 1/2] Seed sinks for priority niche languages --- kb/kb_test.go | 25 ++++++++++++++++++ knowledge/clojure/language.toml | 46 ++++++++++++++++++++++++++++++++ knowledge/crystal/language.toml | 41 ++++++++++++++++++++++++++++ knowledge/d/language.toml | 38 ++++++++++++++++++++++++++ knowledge/erlang/language.toml | 47 +++++++++++++++++++++++++++++++++ knowledge/fsharp/language.toml | 41 ++++++++++++++++++++++++++++ knowledge/groovy/language.toml | 43 ++++++++++++++++++++++++++++++ knowledge/haskell/language.toml | 41 ++++++++++++++++++++++++++++ knowledge/julia/language.toml | 41 ++++++++++++++++++++++++++++ knowledge/nim/language.toml | 35 ++++++++++++++++++++++++ knowledge/ocaml/language.toml | 39 +++++++++++++++++++++++++++ knowledge/r/language.toml | 46 ++++++++++++++++++++++++++++++++ 12 files changed, 483 insertions(+) diff --git a/kb/kb_test.go b/kb/kb_test.go index a1057af..a5b612c 100644 --- a/kb/kb_test.go +++ b/kb/kb_test.go @@ -203,6 +203,31 @@ func TestRailsHasTaxonomy(t *testing.T) { } } +func TestPriorityNicheLanguagesHaveSinks(t *testing.T) { + base := loadKB(t) + for _, name := range []string{ + "Groovy", + "R", + "Julia", + "Haskell", + "OCaml", + "Nim", + "Crystal", + "F#", + "D", + "Erlang", + "Clojure", + } { + tool := base.ByName[name] + if tool == nil { + t.Fatalf("%s not found in KB", name) + } + if len(tool.Security.Sinks) == 0 { + t.Errorf("%s should have security sinks", name) + } + } +} + func TestTaxonomyTermsResolve(t *testing.T) { base := loadKB(t) valid := loadTaxonomyTerms(t) diff --git a/knowledge/clojure/language.toml b/knowledge/clojure/language.toml index e654f1e..cf2bc3b 100644 --- a/knowledge/clojure/language.toml +++ b/knowledge/clojure/language.toml @@ -12,3 +12,49 @@ ecosystems = ["clojure"] [taxonomy] role = ["language"] + +[[security.sinks]] +symbol = "eval" +threat = "code_injection" +cwe = "CWE-94" +note = "Evaluates Clojure forms" + +[[security.sinks]] +symbol = "load-string" +threat = "code_injection" +cwe = "CWE-94" +note = "Reads and evaluates forms from a string" + +[[security.sinks]] +symbol = "clojure.java.shell/sh" +threat = "command_injection" +cwe = "CWE-78" +note = "Dangerous when command or args are caller-controlled" + +[[security.sinks]] +symbol = "sh/sh" +threat = "command_injection" +cwe = "CWE-78" + +[[security.sinks]] +symbol = "read-string" +threat = "deserialization" +cwe = "CWE-502" +note = "Prefer clojure.edn/read-string for untrusted data" + +[[security.sinks]] +symbol = "clojure.core/read" +threat = "deserialization" +cwe = "CWE-502" + +[[security.sinks]] +symbol = "slurp" +threat = "path_traversal" +cwe = "CWE-22" +note = "When file path or URL is caller-controlled" + +[[security.sinks]] +symbol = "clojure.xml/parse" +threat = "xxe" +cwe = "CWE-611" +note = "XML parsing of untrusted input can resolve external entities" diff --git a/knowledge/crystal/language.toml b/knowledge/crystal/language.toml index 116ee59..fdf847a 100644 --- a/knowledge/crystal/language.toml +++ b/knowledge/crystal/language.toml @@ -12,3 +12,44 @@ ecosystems = ["crystal"] [taxonomy] role = ["language"] + +[[security.sinks]] +symbol = "system" +threat = "command_injection" +cwe = "CWE-78" +note = "Runs a command through the system shell" + +[[security.sinks]] +symbol = "Process.run" +threat = "command_injection" +cwe = "CWE-78" +note = "Dangerous when command or args are caller-controlled" + +[[security.sinks]] +symbol = "Process.new" +threat = "command_injection" +cwe = "CWE-78" + +[[security.sinks]] +symbol = "`" +threat = "command_injection" +cwe = "CWE-78" +note = "Backtick command execution" + +[[security.sinks]] +symbol = "run" +threat = "command_injection" +cwe = "CWE-78" +note = "Macro compile-time command execution" + +[[security.sinks]] +symbol = "File.read" +threat = "path_traversal" +cwe = "CWE-22" +note = "When file path is caller-controlled" + +[[security.sinks]] +symbol = "YAML.parse" +threat = "deserialization" +cwe = "CWE-502" +note = "Avoid parsing untrusted YAML into application objects" diff --git a/knowledge/d/language.toml b/knowledge/d/language.toml index 7f9c901..6ace573 100644 --- a/knowledge/d/language.toml +++ b/knowledge/d/language.toml @@ -12,3 +12,41 @@ ecosystems = ["d"] [taxonomy] role = ["language"] + +[[security.sinks]] +symbol = "std.process.execute" +threat = "command_injection" +cwe = "CWE-78" +note = "Dangerous when command or args are caller-controlled" + +[[security.sinks]] +symbol = "std.process.spawnProcess" +threat = "command_injection" +cwe = "CWE-78" + +[[security.sinks]] +symbol = "std.process.spawnShell" +threat = "command_injection" +cwe = "CWE-78" +note = "Runs a command through the system shell" + +[[security.sinks]] +symbol = "std.process.pipeShell" +threat = "command_injection" +cwe = "CWE-78" + +[[security.sinks]] +symbol = "core.stdc.stdlib.system" +threat = "command_injection" +cwe = "CWE-78" + +[[security.sinks]] +symbol = "std.file.read" +threat = "path_traversal" +cwe = "CWE-22" +note = "When file path is caller-controlled" + +[[security.sinks]] +symbol = "std.file.readText" +threat = "path_traversal" +cwe = "CWE-22" diff --git a/knowledge/erlang/language.toml b/knowledge/erlang/language.toml index 5626c97..6020443 100644 --- a/knowledge/erlang/language.toml +++ b/knowledge/erlang/language.toml @@ -12,3 +12,50 @@ ecosystems = ["erlang"] [taxonomy] role = ["language"] + +[[security.sinks]] +symbol = "os:cmd" +threat = "command_injection" +cwe = "CWE-78" +note = "Runs a command through the system shell" + +[[security.sinks]] +symbol = "open_port" +threat = "command_injection" +cwe = "CWE-78" +note = "With {spawn, Command} or caller-controlled executable" + +[[security.sinks]] +symbol = "erl_eval:exprs" +threat = "code_injection" +cwe = "CWE-94" +note = "Evaluates Erlang expressions" + +[[security.sinks]] +symbol = "erlang:binary_to_term" +threat = "deserialization" +cwe = "CWE-502" +note = "Use the safe option for untrusted binaries" + +[[security.sinks]] +symbol = "binary_to_term" +threat = "deserialization" +cwe = "CWE-502" + +[[security.sinks]] +symbol = "erlang:list_to_atom" +threat = "dos" +cwe = "CWE-400" +note = "Atoms are not garbage collected" + +[[security.sinks]] +symbol = "file:read_file" +threat = "path_traversal" +cwe = "CWE-22" +note = "When file path is caller-controlled" + +[[security.sinks]] +symbol = "httpc:request" +threat = "ssrf" +cwe = "CWE-918" +note = "When URL is caller-controlled" diff --git a/knowledge/fsharp/language.toml b/knowledge/fsharp/language.toml index b877c7f..982da50 100644 --- a/knowledge/fsharp/language.toml +++ b/knowledge/fsharp/language.toml @@ -12,3 +12,44 @@ ecosystems = ["fsharp"] [taxonomy] role = ["language"] + +[[security.sinks]] +symbol = "Process.Start" +threat = "command_injection" +cwe = "CWE-78" + +[[security.sinks]] +symbol = "ProcessStartInfo" +threat = "command_injection" +cwe = "CWE-78" +note = "When FileName or Arguments are caller-controlled" + +[[security.sinks]] +symbol = "BinaryFormatter.Deserialize" +threat = "deserialization" +cwe = "CWE-502" +note = "Obsolete formatter with gadget-chain risk" + +[[security.sinks]] +symbol = "JsonConvert.DeserializeObject" +threat = "deserialization" +cwe = "CWE-502" +note = "Newtonsoft with TypeNameHandling enabled" + +[[security.sinks]] +symbol = "Assembly.Load" +threat = "code_injection" +cwe = "CWE-470" +note = "When assembly name or bytes are caller-controlled" + +[[security.sinks]] +symbol = "Activator.CreateInstance" +threat = "unsafe_reflection" +cwe = "CWE-470" +note = "When type name is caller-controlled" + +[[security.sinks]] +symbol = "File.ReadAllText" +threat = "path_traversal" +cwe = "CWE-22" +note = "When file path is caller-controlled" diff --git a/knowledge/groovy/language.toml b/knowledge/groovy/language.toml index 0581de6..2ced0c6 100644 --- a/knowledge/groovy/language.toml +++ b/knowledge/groovy/language.toml @@ -12,3 +12,46 @@ ecosystems = ["groovy"] [taxonomy] role = ["language"] + +[[security.sinks]] +symbol = "GroovyShell.evaluate" +threat = "code_injection" +cwe = "CWE-94" +note = "Evaluates Groovy source; dangerous when script text is caller-controlled" + +[[security.sinks]] +symbol = "Eval.me" +threat = "code_injection" +cwe = "CWE-94" + +[[security.sinks]] +symbol = "Eval.x" +threat = "code_injection" +cwe = "CWE-94" + +[[security.sinks]] +symbol = "Eval.xy" +threat = "code_injection" +cwe = "CWE-94" + +[[security.sinks]] +symbol = "String.execute" +threat = "command_injection" +cwe = "CWE-78" +note = "Runs a shell command when the string is caller-controlled" + +[[security.sinks]] +symbol = "execute" +threat = "command_injection" +cwe = "CWE-78" + +[[security.sinks]] +symbol = "ObjectInputStream.readObject" +threat = "deserialization" +cwe = "CWE-502" + +[[security.sinks]] +symbol = "File.text" +threat = "path_traversal" +cwe = "CWE-22" +note = "When file path is caller-controlled" diff --git a/knowledge/haskell/language.toml b/knowledge/haskell/language.toml index da70f58..163909d 100644 --- a/knowledge/haskell/language.toml +++ b/knowledge/haskell/language.toml @@ -11,3 +11,44 @@ ecosystems = ["haskell"] [taxonomy] role = ["language"] + +[[security.sinks]] +symbol = "System.Process.callCommand" +threat = "command_injection" +cwe = "CWE-78" +note = "Runs a command through the system shell" + +[[security.sinks]] +symbol = "System.Process.readProcess" +threat = "command_injection" +cwe = "CWE-78" +note = "Dangerous when command or args are caller-controlled" + +[[security.sinks]] +symbol = "System.Process.createProcess" +threat = "command_injection" +cwe = "CWE-78" + +[[security.sinks]] +symbol = "System.Process.shell" +threat = "command_injection" +cwe = "CWE-78" +note = "Shell command constructor" + +[[security.sinks]] +symbol = "unsafePerformIO" +threat = "code_injection" +cwe = "CWE-94" +note = "Unsafe effect boundary; audit when exposed to dynamic evaluation or plugins" + +[[security.sinks]] +symbol = "Data.Binary.decode" +threat = "deserialization" +cwe = "CWE-502" +note = "Binary decoding of untrusted bytes can allocate or instantiate unexpected values" + +[[security.sinks]] +symbol = "readFile" +threat = "path_traversal" +cwe = "CWE-22" +note = "When file path is caller-controlled" diff --git a/knowledge/julia/language.toml b/knowledge/julia/language.toml index 43b5495..35e0253 100644 --- a/knowledge/julia/language.toml +++ b/knowledge/julia/language.toml @@ -12,3 +12,44 @@ ecosystems = ["julia"] [taxonomy] role = ["language"] + +[[security.sinks]] +symbol = "eval" +threat = "code_injection" +cwe = "CWE-94" +note = "Evaluates Julia expressions" + +[[security.sinks]] +symbol = "Meta.parse" +threat = "code_injection" +cwe = "CWE-94" +note = "Dangerous when parsed expressions are evaluated" + +[[security.sinks]] +symbol = "run" +threat = "command_injection" +cwe = "CWE-78" +note = "Executes external commands" + +[[security.sinks]] +symbol = "Cmd" +threat = "command_injection" +cwe = "CWE-78" +note = "Shell command construction with caller-controlled values" + +[[security.sinks]] +symbol = "include" +threat = "code_injection" +cwe = "CWE-94" +note = "Executes source from caller-controlled paths" + +[[security.sinks]] +symbol = "Serialization.deserialize" +threat = "deserialization" +cwe = "CWE-502" + +[[security.sinks]] +symbol = "open" +threat = "path_traversal" +cwe = "CWE-22" +note = "When file path is caller-controlled" diff --git a/knowledge/nim/language.toml b/knowledge/nim/language.toml index 800592b..295383e 100644 --- a/knowledge/nim/language.toml +++ b/knowledge/nim/language.toml @@ -12,3 +12,38 @@ ecosystems = ["nim"] [taxonomy] role = ["language"] + +[[security.sinks]] +symbol = "execShellCmd" +threat = "command_injection" +cwe = "CWE-78" +note = "Runs a command through the system shell" + +[[security.sinks]] +symbol = "execProcess" +threat = "command_injection" +cwe = "CWE-78" +note = "Dangerous when command or args are caller-controlled" + +[[security.sinks]] +symbol = "startProcess" +threat = "command_injection" +cwe = "CWE-78" + +[[security.sinks]] +symbol = "gorge" +threat = "command_injection" +cwe = "CWE-78" +note = "Compile-time shell execution" + +[[security.sinks]] +symbol = "staticExec" +threat = "command_injection" +cwe = "CWE-78" +note = "Compile-time shell execution" + +[[security.sinks]] +symbol = "readFile" +threat = "path_traversal" +cwe = "CWE-22" +note = "When file path is caller-controlled" diff --git a/knowledge/ocaml/language.toml b/knowledge/ocaml/language.toml index f02c0a3..c003cfd 100644 --- a/knowledge/ocaml/language.toml +++ b/knowledge/ocaml/language.toml @@ -12,3 +12,42 @@ ecosystems = ["ocaml"] [taxonomy] role = ["language"] + +[[security.sinks]] +symbol = "Sys.command" +threat = "command_injection" +cwe = "CWE-78" +note = "Runs a command through the system shell" + +[[security.sinks]] +symbol = "Unix.execvp" +threat = "command_injection" +cwe = "CWE-78" +note = "Dangerous when command or args are caller-controlled" + +[[security.sinks]] +symbol = "Unix.create_process" +threat = "command_injection" +cwe = "CWE-78" + +[[security.sinks]] +symbol = "Unix.open_process" +threat = "command_injection" +cwe = "CWE-78" + +[[security.sinks]] +symbol = "Marshal.from_channel" +threat = "deserialization" +cwe = "CWE-502" +note = "OCaml Marshal format is unsafe for untrusted data" + +[[security.sinks]] +symbol = "Marshal.from_string" +threat = "deserialization" +cwe = "CWE-502" + +[[security.sinks]] +symbol = "open_in" +threat = "path_traversal" +cwe = "CWE-22" +note = "When file path is caller-controlled" diff --git a/knowledge/r/language.toml b/knowledge/r/language.toml index 8ac6660..a0b82cd 100644 --- a/knowledge/r/language.toml +++ b/knowledge/r/language.toml @@ -11,3 +11,49 @@ ecosystems = ["r"] [taxonomy] role = ["language"] + +[[security.sinks]] +symbol = "system" +threat = "command_injection" +cwe = "CWE-78" +note = "Passes a string to the system shell" + +[[security.sinks]] +symbol = "system2" +threat = "command_injection" +cwe = "CWE-78" +note = "Command and args are dangerous when caller-controlled" + +[[security.sinks]] +symbol = "eval(parse())" +threat = "code_injection" +cwe = "CWE-94" +note = "Parses and evaluates R source from strings" + +[[security.sinks]] +symbol = "parse" +threat = "code_injection" +cwe = "CWE-94" +note = "Dangerous when paired with eval on caller-controlled text" + +[[security.sinks]] +symbol = "source" +threat = "code_injection" +cwe = "CWE-94" +note = "Executes R source from caller-controlled files or URLs" + +[[security.sinks]] +symbol = "unserialize" +threat = "deserialization" +cwe = "CWE-502" + +[[security.sinks]] +symbol = "readRDS" +threat = "deserialization" +cwe = "CWE-502" + +[[security.sinks]] +symbol = "readLines" +threat = "path_traversal" +cwe = "CWE-22" +note = "When path is caller-controlled" From f5d712487ab7ceb483de7708deafeb92439e5331 Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Mon, 15 Jun 2026 14:21:15 +0530 Subject: [PATCH 2/2] Address sink review feedback --- knowledge/crystal/language.toml | 6 ------ knowledge/fsharp/language.toml | 2 +- knowledge/haskell/language.toml | 6 ------ 3 files changed, 1 insertion(+), 13 deletions(-) diff --git a/knowledge/crystal/language.toml b/knowledge/crystal/language.toml index fdf847a..0d62b19 100644 --- a/knowledge/crystal/language.toml +++ b/knowledge/crystal/language.toml @@ -47,9 +47,3 @@ symbol = "File.read" threat = "path_traversal" cwe = "CWE-22" note = "When file path is caller-controlled" - -[[security.sinks]] -symbol = "YAML.parse" -threat = "deserialization" -cwe = "CWE-502" -note = "Avoid parsing untrusted YAML into application objects" diff --git a/knowledge/fsharp/language.toml b/knowledge/fsharp/language.toml index 982da50..999da52 100644 --- a/knowledge/fsharp/language.toml +++ b/knowledge/fsharp/language.toml @@ -38,7 +38,7 @@ note = "Newtonsoft with TypeNameHandling enabled" [[security.sinks]] symbol = "Assembly.Load" -threat = "code_injection" +threat = "unsafe_reflection" cwe = "CWE-470" note = "When assembly name or bytes are caller-controlled" diff --git a/knowledge/haskell/language.toml b/knowledge/haskell/language.toml index 163909d..8958fd6 100644 --- a/knowledge/haskell/language.toml +++ b/knowledge/haskell/language.toml @@ -35,12 +35,6 @@ threat = "command_injection" cwe = "CWE-78" note = "Shell command constructor" -[[security.sinks]] -symbol = "unsafePerformIO" -threat = "code_injection" -cwe = "CWE-94" -note = "Unsafe effect boundary; audit when exposed to dynamic evaluation or plugins" - [[security.sinks]] symbol = "Data.Binary.decode" threat = "deserialization"