diff --git a/bot/bot.py b/bot/bot.py index 2272cce..8df2f56 100644 --- a/bot/bot.py +++ b/bot/bot.py @@ -16,7 +16,7 @@ _DATA_DIR = pathlib.Path(__file__).parent.parent / "data" _WORD_LIST_FILES: dict[Difficulty, pathlib.Path] = { Difficulty.EASY: _DATA_DIR / "interest_words_f.txt", - Difficulty.MEDIUM: _DATA_DIR / "interest_words_d.txt", + Difficulty.MEDIUM: _DATA_DIR / "interest_words_m.txt", Difficulty.HARD: _DATA_DIR / "interest_words_d.txt", } @@ -30,7 +30,7 @@ "solution — reveal the answer (broadcaster only) | " "setprefix — change prefix (mod/broadcaster) | " "setcooldown — change cooldown (mod/broadcaster) | " - "setdifficulty — set difficulty for next game (mod/broadcaster)" + "setdifficulty — set difficulty for next game (mod/broadcaster)" ) _MAX_PREFIX_LEN = 10 @@ -64,13 +64,8 @@ def _validate_cooldown(value: str) -> str | None: def _validate_difficulty(value: str | None) -> str | None: - """Return an error message string if *value* is not a valid setdifficulty value, else ``None``. - - Note: ``setdifficulty`` only accepts ``easy`` and ``hard``. ``medium`` is - intentionally excluded here; it remains accessible via the ``start`` command - for backwards compatibility. - """ - valid = {Difficulty.EASY.value, Difficulty.HARD.value} + """Return an error message string if *value* is not a valid setdifficulty value, else ``None``.""" + valid = {d.value for d in Difficulty} if not value or value.lower() not in valid: return f"difficulty must be one of: {', '.join(sorted(valid))}" return None @@ -159,6 +154,16 @@ async def start_game(self, ctx: commands.Context, difficulty: str = "") -> None: await ctx.send("Word list is empty. Cannot start game.") return + scorer = self._game_state.scorer + if scorer is not None: + words = [w for w in words if scorer.is_in_vocab(w)] + if not words: + await ctx.send( + "No playable words found for this difficulty " + "(all words are out of vocabulary). Check the word list." + ) + return + target = random.choice(words) self._game_state.start_new_game(target, diff) await ctx.send( @@ -306,7 +311,7 @@ async def hint(self, ctx: commands.Context) -> None: return parts = [ - f"{i + 1}. {e.raw_word} ({math.floor((e.score or 0.0) * 100)}%)" + f"{i + 1}. {e.raw_word} ({math.floor(e.score * 100)}%)" for i, e in enumerate(top) ] await ctx.send("Top guesses: " + " | ".join(parts)) @@ -333,7 +338,7 @@ async def status(self, ctx: commands.Context) -> None: top = self._game_state.top_guesses(1) if top: best = top[0] - pct = math.floor((best.score or 0.0) * 100) + pct = math.floor(best.score * 100) await ctx.send( f"Game in progress. {attempts} attempt(s). " f"Best guess: '{best.raw_word}' ({pct}%)." @@ -347,7 +352,7 @@ async def status(self, ctx: commands.Context) -> None: async def setdifficulty(self, ctx: commands.Context, difficulty: str = "") -> None: """Change the difficulty for the next game (moderators and broadcaster only). - Usage: setdifficulty + Usage: setdifficulty Does not affect the current game. The change is applied immediately but is not persisted; it resets when the bot restarts. diff --git a/data/interest_words_m.txt b/data/interest_words_m.txt new file mode 100644 index 0000000..d56c732 --- /dev/null +++ b/data/interest_words_m.txt @@ -0,0 +1,39 @@ +# French interest words - moyen (medium) difficulty +# One word per line, no accents required for lookup (clean_word normalises) +# Semi-concrete French nouns: familiar to all speakers but requiring +# more lateral thinking than easy words. Zero overlap with EASY or HARD lists. +# All words are high-frequency in frWac (no OOV risk). +âge +aile +bruit +branche +désert +fenêtre +fête +feu +force +fumée +graine +guerre +île +image +joie +lac +liberté +miroir +mort +mur +musique +nuage +ombre +peur +pierre +plage +regard +rêve +roi +sable +silence +vague +voix +voyage diff --git a/game/engine.py b/game/engine.py index bdfbf0b..48d22cf 100644 --- a/game/engine.py +++ b/game/engine.py @@ -58,6 +58,25 @@ def is_loaded(self) -> bool: # Public API # ------------------------------------------------------------------ + def is_in_vocab(self, word: str) -> bool: + """Return ``True`` if *word* is present in the model vocabulary. + + The word is cleaned/normalised before lookup, matching the same + pre-processing applied by :meth:`score_guess`. + + Args: + word: The word to check. + + Returns: + ``True`` if the cleaned form of *word* maps to a vocabulary key. + + Raises: + RuntimeError: If the model has not been loaded yet. + """ + if self._model is None: + raise RuntimeError("Model not loaded. Call load() first.") + return self._cleaned_key_map.get(clean_word(word)) is not None + def similarity(self, word_a: str, word_b: str) -> float | None: """Return the cosine similarity between two words. diff --git a/game/state.py b/game/state.py index aa2fcdf..f13cbb9 100644 --- a/game/state.py +++ b/game/state.py @@ -28,6 +28,10 @@ def score_guess(self, guess: str, target: str) -> float | None: """Return a similarity score in ``[0, 1]``, or ``None`` if unknown.""" ... + def is_in_vocab(self, word: str) -> bool: + """Return ``True`` if *word* is present in the model vocabulary.""" + ... + @dataclass class GuessEntry: @@ -82,6 +86,11 @@ def __init__(self, scorer: Scorer | None = None) -> None: self._history: list[GuessEntry] = [] self._is_found: bool = False + @property + def scorer(self) -> Scorer | None: + """Return the configured scorer, or ``None`` if none was provided.""" + return self._scorer + # ------------------------------------------------------------------ # Game lifecycle # ------------------------------------------------------------------ @@ -209,4 +218,4 @@ def top_guesses(self, n: int = 10) -> list[GuessEntry]: A list of at most *n* :class:`GuessEntry` objects. """ scored = [e for e in self._history if e.score is not None] - return sorted(scored, key=lambda e: e.score or 0.0, reverse=True)[:n] + return sorted(scored, key=lambda e: e.score, reverse=True)[:n] diff --git a/tests/unit/test_commands.py b/tests/unit/test_commands.py index 8b1e4f5..be449a8 100644 --- a/tests/unit/test_commands.py +++ b/tests/unit/test_commands.py @@ -670,7 +670,7 @@ async def test_hard_difficulty_accepted(self): async def test_medium_difficulty_accepted(self): bot = make_bot() ctx = make_ctx(is_broadcaster=True) - with patch("random.choice", return_value="ambiguïté"): + with patch("random.choice", return_value="rêve"): await _start_fn(bot, ctx, "medium") assert bot._game_state.difficulty == Difficulty.MEDIUM @@ -725,6 +725,55 @@ async def test_start_confirmation_message_contains_prefix(self): assert "!sx" in message +# --------------------------------------------------------------------------- +# OOV filtering in start_game +# --------------------------------------------------------------------------- + +class _OovAwareScorer: + """Scorer that returns None for words not in the valid set (simulates OOV).""" + + def __init__(self, valid_words: set[str]) -> None: + self._valid = valid_words + + def score_guess(self, guess: str, target: str) -> float | None: + from game.word_utils import clean_word + if clean_word(guess) in self._valid: + return 0.5 + return None + + def is_in_vocab(self, word: str) -> bool: + from game.word_utils import clean_word + return clean_word(word) in self._valid + + +@pytest.mark.asyncio +class TestStartOovFiltering: + async def test_all_words_oov_sends_error_and_aborts(self): + bot = make_bot() + bot._game_state = GameState(scorer=_OovAwareScorer(set())) + ctx = make_ctx(is_broadcaster=True) + with patch("bot.bot.load_word_list", return_value=["chat", "licorne", "dragon"]): + await _start_fn(bot, ctx) + message = ctx.send.call_args[0][0] + assert "out of vocabulary" in message.lower() + assert bot._game_state.target_word is None + + async def test_partial_oov_starts_game_with_valid_word(self): + bot = make_bot() + bot._game_state = GameState(scorer=_OovAwareScorer({"chat", "dragon"})) + ctx = make_ctx(is_broadcaster=True) + with patch("bot.bot.load_word_list", return_value=["chat", "licorne", "dragon"]): + await _start_fn(bot, ctx) + assert bot._game_state.target_word in {"chat", "dragon"} + + async def test_no_scorer_skips_oov_filter(self): + bot = make_bot() + ctx = make_ctx(is_broadcaster=True) + with patch("bot.bot.load_word_list", return_value=["chat", "licorne", "dragon"]): + await _start_fn(bot, ctx) + assert bot._game_state.target_word in {"chat", "licorne", "dragon"} + + # --------------------------------------------------------------------------- # hint command # --------------------------------------------------------------------------- @@ -843,8 +892,8 @@ def test_easy_is_valid(self): def test_hard_is_valid(self): assert _validate_difficulty("hard") is None - def test_medium_is_invalid(self): - assert _validate_difficulty("medium") is not None + def test_medium_is_valid(self): + assert _validate_difficulty("medium") is None def test_empty_is_invalid(self): assert _validate_difficulty("") is not None @@ -891,7 +940,7 @@ class TestSetdifficultyValidation: async def test_invalid_difficulty_rejected(self): bot = make_bot() ctx = make_ctx(is_mod=True) - await _setdifficulty_fn(bot, ctx, "medium") + await _setdifficulty_fn(bot, ctx, "extreme") assert bot._next_difficulty == Difficulty.EASY # unchanged ctx.send.assert_called_once() assert "invalid" in ctx.send.call_args[0][0].lower() @@ -916,6 +965,13 @@ async def test_valid_hard_accepted(self): assert bot._next_difficulty == Difficulty.HARD ctx.send.assert_called_once() + async def test_valid_medium_accepted(self): + bot = make_bot() + ctx = make_ctx(is_mod=True) + await _setdifficulty_fn(bot, ctx, "medium") + assert bot._next_difficulty == Difficulty.MEDIUM + ctx.send.assert_called_once() + async def test_confirmation_message_contains_difficulty(self): bot = make_bot() ctx = make_ctx(is_mod=True) diff --git a/tests/unit/test_engine.py b/tests/unit/test_engine.py index 4a972a8..f4ef3cf 100644 --- a/tests/unit/test_engine.py +++ b/tests/unit/test_engine.py @@ -59,6 +59,30 @@ def test_score_guess_raises_when_not_loaded(self): with pytest.raises(RuntimeError, match="not loaded"): engine.score_guess("chat", "chien") + def test_is_in_vocab_raises_when_not_loaded(self): + engine = SemanticEngine(model_path="/nonexistent/path.bin") + with pytest.raises(RuntimeError, match="not loaded"): + engine.is_in_vocab("chat") + + +# --------------------------------------------------------------------------- +# SemanticEngine – is_in_vocab +# --------------------------------------------------------------------------- + +class TestSemanticEngineIsInVocab: + def test_known_word_returns_true(self): + engine = _make_engine() + assert engine.is_in_vocab("chat") is True + + def test_unknown_word_returns_false(self): + engine = _make_engine() + assert engine.is_in_vocab("licorne") is False + + def test_all_vocabulary_words_are_in_vocab(self): + engine = _make_engine() + for word in ["chat", "chien", "maison", "voiture"]: + assert engine.is_in_vocab(word) is True + # --------------------------------------------------------------------------- # SemanticEngine – similarity