Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 19 additions & 14 deletions zeeguu/api/test/test_verbal_flashcards.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,16 +304,17 @@ def test_score_word_match_accepts_common_danish_asr_variants():


@pytest.mark.parametrize(
"user_word, expected_word",
"user_word, expected_word, expected_allowed_distance",
[
("hat", "kat"),
("hond", "hund"),
("pange", "penge"),
("hat", "kat", 1),
("hond", "hund", 1),
("pange", "penge", 2),
],
)
def test_score_word_match_accepts_one_optimal_string_alignment_edit(
def test_score_word_match_accepts_words_within_length_based_edit_budget(
user_word,
expected_word,
expected_allowed_distance,
):
from zeeguu.core.verbal_flashcards.fuzzy_match import score_word_match

Expand All @@ -322,33 +323,37 @@ def test_score_word_match_accepts_one_optimal_string_alignment_edit(
assert result["isMatch"] is True
assert result["matchType"] == "fuzzy"
assert result["optimalStringAlignmentDistance"] == 1
assert result["allowedOptimalStringAlignmentDistance"] == 1
assert result["allowedOptimalStringAlignmentDistance"] == expected_allowed_distance
assert result["jaroWinkler"] > 0


@pytest.mark.parametrize(
"user_word, expected_word",
"user_word, expected_word, expected_allowed_distance",
[
("hot", "kat"),
("hd", "hund"),
("pen", "penge"),
("hot", "kat", 1),
("zzzz", "hund", 1),
("xxxxx", "penge", 2),
],
)
def test_score_word_match_rejects_multiple_optimal_string_alignment_edits(
def test_score_word_match_rejects_words_outside_length_based_edit_budget(
user_word,
expected_word,
expected_allowed_distance,
):
from zeeguu.core.verbal_flashcards.fuzzy_match import score_word_match

result = score_word_match(user_word, expected_word, language_code="da")

assert result["isMatch"] is False
assert result["matchType"] == "close"
assert result["optimalStringAlignmentDistance"] > 1
assert result["allowedOptimalStringAlignmentDistance"] == 1
assert (
result["optimalStringAlignmentDistance"]
> result["allowedOptimalStringAlignmentDistance"]
)
assert result["allowedOptimalStringAlignmentDistance"] == expected_allowed_distance


def test_score_word_match_requires_exact_match_for_two_letter_words():
def test_score_word_match_allows_one_edit_for_two_letter_words():
from zeeguu.core.verbal_flashcards.fuzzy_match import score_word_match

result = score_word_match("og", "ok", language_code="da")
Expand Down
18 changes: 13 additions & 5 deletions zeeguu/core/verbal_flashcards/fuzzy_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,14 +152,22 @@ def allowed_optimal_string_alignment_distance(expected_word, language_code=None)
Return the maximum edit distance accepted for a spoken flashcard answer.
Acceptance is based on edit distance, not a blended similarity score:
after language-specific normalization, words of length >= 3 may differ by
one optimal string alignment edit. Jaro-Winkler is still returned as a
diagnostic signal for debugging and future analysis, but it does not decide
correctness.
after language-specific normalization, longer words get a larger edit
budget because ASR approximations often drift more on longer Danish words.
Jaro-Winkler is still returned as a diagnostic signal for debugging and
future analysis, but it does not decide correctness.
"""
normalizer = normalizer_for(language_code)
normalized_length = len(normalizer.canonical_form(expected_word))
return 0 if normalized_length <= 2 else 1
if normalized_length <= 2:
return 0
if normalized_length <= 4:
return 1
if normalized_length <= 6:
return 2
if normalized_length <= 9:
return 3
return 4


def fuzzy_match_threshold(expected_word, language_code=None):
Expand Down
Loading