diff --git a/package.json b/package.json index b8684de..778f047 100644 --- a/package.json +++ b/package.json @@ -40,6 +40,7 @@ }, "homepage": "https://github.com/teekay/JamComments#readme", "dependencies": { + "@anthropic-ai/sdk": "^0.72.1", "@azure/functions": "3.2.0", "@azure/service-bus": "7.6.0", "@nestjs/common": "10.*", diff --git a/sql/migrations/5-add-llm-spam-check-settings.sql b/sql/migrations/5-add-llm-spam-check-settings.sql new file mode 100644 index 0000000..c5e0647 --- /dev/null +++ b/sql/migrations/5-add-llm-spam-check-settings.sql @@ -0,0 +1,3 @@ +ALTER TABLE account_settings ADD COLUMN use_llm_check BOOLEAN DEFAULT false; +ALTER TABLE account_settings ADD COLUMN llm_api_key VARCHAR(256); +ALTER TABLE account_settings ADD COLUMN llm_confidence_threshold DECIMAL DEFAULT 0.8; diff --git a/sql/sqlite/schema.sql b/sql/sqlite/schema.sql index 25ee095..2e041b4 100644 --- a/sql/sqlite/schema.sql +++ b/sql/sqlite/schema.sql @@ -17,7 +17,10 @@ CREATE TABLE IF NOT EXISTS account_settings ( blog_url TEXT, akismet_key TEXT, use_akismet INTEGER DEFAULT 0, - require_moderation INTEGER NOT NULL DEFAULT 0 + require_moderation INTEGER NOT NULL DEFAULT 0, + use_llm_check INTEGER DEFAULT 0, + llm_api_key TEXT, + llm_confidence_threshold REAL DEFAULT 0.8 ); -- Account email settings diff --git a/src/azure/accounts.module.ts b/src/azure/accounts.module.ts index 61ac9c0..ffa72da 100644 --- a/src/azure/accounts.module.ts +++ b/src/azure/accounts.module.ts @@ -1,5 +1,6 @@ import { AccountService } from '../shared/accounts/account.service' import { AkismetService } from '../shared/comments/akismet.service' +import { LlmSpamService } from '../shared/comments/llm-spam.service' import { AzureCommentsModule } from './comments.module' import { CryptoModule } from '../shared/crypto/crypto.module' import { forwardRef, Module } from '@nestjs/common' @@ -10,7 +11,7 @@ import { TokenService } from '../shared/accounts/token.service' @Module({ imports: [forwardRef(() => AzureCommentsModule), CryptoModule, PersistenceModule, PassportModule], controllers: [], - providers: [AccountService, TokenService, AkismetService], - exports: [AccountService, TokenService, AkismetService], + providers: [AccountService, TokenService, AkismetService, LlmSpamService], + exports: [AccountService, TokenService, AkismetService, LlmSpamService], }) export class AzureAccountsModule {} diff --git a/src/shared/accounts/account.module.ts b/src/shared/accounts/account.module.ts index 24040f6..f084584 100644 --- a/src/shared/accounts/account.module.ts +++ b/src/shared/accounts/account.module.ts @@ -1,5 +1,6 @@ import { AccountService } from './account.service' import { AkismetService } from '../comments/akismet.service' +import { LlmSpamService } from '../comments/llm-spam.service' import { CommentsModule } from '../comments/comments.module' import { CryptoModule } from '../crypto/crypto.module' import { forwardRef, Module } from '@nestjs/common' @@ -9,7 +10,7 @@ import { TokenService } from './token.service' @Module({ imports: [forwardRef(() => CommentsModule), CryptoModule, PassportModule], controllers: [], - providers: [AccountService, TokenService, AkismetService], - exports: [AccountService, TokenService, AkismetService], + providers: [AccountService, TokenService, AkismetService, LlmSpamService], + exports: [AccountService, TokenService, AkismetService, LlmSpamService], }) export class AccountsModule {} diff --git a/src/shared/accounts/accounts.queries.ts b/src/shared/accounts/accounts.queries.ts index 54361b7..4174ad7 100644 --- a/src/shared/accounts/accounts.queries.ts +++ b/src/shared/accounts/accounts.queries.ts @@ -368,6 +368,9 @@ export interface IAccountSettingsResult { id: string; require_moderation: boolean; use_akismet: boolean | null; + use_llm_check: boolean | null; + llm_api_key: string | null; + llm_confidence_threshold: string | null; } /** 'AccountSettings' query type */ @@ -424,6 +427,9 @@ export interface IUpdateSettingsParams { blogUrl?: string | null | void; requireModeration?: boolean | null | void; useAkismet?: boolean | null | void; + useLlmCheck?: boolean | null | void; + llmApiKey?: string | null | void; + llmConfidenceThreshold?: number | null | void; } /** 'UpdateSettings' return type */ @@ -435,12 +441,12 @@ export interface IUpdateSettingsQuery { result: IUpdateSettingsResult; } -const updateSettingsIR: any = {"usedParamSet":{"requireModeration":true,"blogUrl":true,"useAkismet":true,"akismetKey":true,"accountId":true},"params":[{"name":"requireModeration","required":false,"transform":{"type":"scalar"},"locs":[{"a":47,"b":64}]},{"name":"blogUrl","required":false,"transform":{"type":"scalar"},"locs":[{"a":76,"b":83}]},{"name":"useAkismet","required":false,"transform":{"type":"scalar"},"locs":[{"a":98,"b":108}]},{"name":"akismetKey","required":false,"transform":{"type":"scalar"},"locs":[{"a":123,"b":133}]},{"name":"accountId","required":false,"transform":{"type":"scalar"},"locs":[{"a":152,"b":161}]}],"statement":"UPDATE account_settings SET require_moderation=:requireModeration, blog_url=:blogUrl, use_akismet=:useAkismet, akismet_key=:akismetKey WHERE account_id=:accountId"}; +const updateSettingsIR: any = {"usedParamSet":{"requireModeration":true,"blogUrl":true,"useAkismet":true,"akismetKey":true,"useLlmCheck":true,"llmApiKey":true,"llmConfidenceThreshold":true,"accountId":true},"params":[{"name":"requireModeration","required":false,"transform":{"type":"scalar"},"locs":[{"a":47,"b":64}]},{"name":"blogUrl","required":false,"transform":{"type":"scalar"},"locs":[{"a":76,"b":83}]},{"name":"useAkismet","required":false,"transform":{"type":"scalar"},"locs":[{"a":98,"b":108}]},{"name":"akismetKey","required":false,"transform":{"type":"scalar"},"locs":[{"a":123,"b":133}]},{"name":"useLlmCheck","required":false,"transform":{"type":"scalar"},"locs":[{"a":150,"b":161}]},{"name":"llmApiKey","required":false,"transform":{"type":"scalar"},"locs":[{"a":177,"b":186}]},{"name":"llmConfidenceThreshold","required":false,"transform":{"type":"scalar"},"locs":[{"a":214,"b":236}]},{"name":"accountId","required":false,"transform":{"type":"scalar"},"locs":[{"a":255,"b":264}]}],"statement":"UPDATE account_settings SET require_moderation=:requireModeration, blog_url=:blogUrl, use_akismet=:useAkismet, akismet_key=:akismetKey, use_llm_check=:useLlmCheck, llm_api_key=:llmApiKey, llm_confidence_threshold=:llmConfidenceThreshold WHERE account_id=:accountId"}; /** * Query generated from SQL: * ``` - * UPDATE account_settings SET require_moderation=:requireModeration, blog_url=:blogUrl, use_akismet=:useAkismet, akismet_key=:akismetKey WHERE account_id=:accountId + * UPDATE account_settings SET require_moderation=:requireModeration, blog_url=:blogUrl, use_akismet=:useAkismet, akismet_key=:akismetKey, use_llm_check=:useLlmCheck, llm_api_key=:llmApiKey, llm_confidence_threshold=:llmConfidenceThreshold WHERE account_id=:accountId * ``` */ export const updateSettings = new PreparedQuery(updateSettingsIR); diff --git a/src/shared/accounts/accounts.sql b/src/shared/accounts/accounts.sql index d891df1..34a52a5 100644 --- a/src/shared/accounts/accounts.sql +++ b/src/shared/accounts/accounts.sql @@ -41,7 +41,7 @@ SELECT * FROM account_settings WHERE account_id=:accountId; SELECT * FROM account_email_settings WHERE account_id=:accountId; /* @name updateSettings */ -UPDATE account_settings SET require_moderation=:requireModeration, blog_url=:blogUrl, use_akismet=:useAkismet, akismet_key=:akismetKey WHERE account_id=:accountId; +UPDATE account_settings SET require_moderation=:requireModeration, blog_url=:blogUrl, use_akismet=:useAkismet, akismet_key=:akismetKey, use_llm_check=:useLlmCheck, llm_api_key=:llmApiKey, llm_confidence_threshold=:llmConfidenceThreshold WHERE account_id=:accountId; /* @name updateEmailSettings */ UPDATE account_email_settings SET notify_on_comments=:notifyOnComments, send_comments_digest=:sendCommentsDigest WHERE account_id=:accountId; diff --git a/src/shared/accounts/settings.param.ts b/src/shared/accounts/settings.param.ts index 0bbf5ef..7c9b855 100644 --- a/src/shared/accounts/settings.param.ts +++ b/src/shared/accounts/settings.param.ts @@ -3,6 +3,9 @@ export class SettingsParam { useAkismet = false akismetKey = '' blogUrl = '' + useLlmCheck = false + llmApiKey = '' + llmConfidenceThreshold = 0.8 } export class EmailSettingsParam { diff --git a/src/shared/accounts/views/settings.hbs b/src/shared/accounts/views/settings.hbs index fc19d89..9cf4a98 100644 --- a/src/shared/accounts/views/settings.hbs +++ b/src/shared/accounts/views/settings.hbs @@ -155,7 +155,7 @@
-

Anti-Spam
Anti-Spam (Akismet)
If you enable Akismet integration, we will check each comment to determine if it's SPAM or not relying on the Akismet SPAM detection feature.
@@ -163,14 +163,33 @@ Use anti-SPAM protection (Akismet) - +
+

+
+
+

Anti-Spam (LLM)
Use an LLM (Claude Haiku) as a second pass to detect spam. Comments flagged by the LLM with confidence above threshold will be sent to moderation. +
+

+ Use LLM spam detection + + + +
@@ -329,6 +348,15 @@ inputBlogUrl.removeAttribute('required'); } }); + document.getElementById('useLlmCheck').addEventListener('change', function(evt) { + let isOn = evt.target.checked; + let inputApiKey = document.getElementById('llmApiKey'); + if (isOn) { + inputApiKey.setAttribute('required', 'required'); + } else { + inputApiKey.removeAttribute('required'); + } + }); // remember tabs document.querySelectorAll('button.tablinks') diff --git a/src/shared/comments/comment.service.ts b/src/shared/comments/comment.service.ts index b8420e0..fa6c4a6 100644 --- a/src/shared/comments/comment.service.ts +++ b/src/shared/comments/comment.service.ts @@ -3,6 +3,7 @@ import moment from 'moment' import { Account } from '../accounts/account.interface' import { AccountService } from '../accounts/account.service' import { AkismetService } from './akismet.service' +import { LlmSpamService } from './llm-spam.service' import { Comment, CommentBase, CommentWithId } from './comment.interface' import { Inject, Injectable } from '@nestjs/common' import { Logger } from 'nestjs-pino' @@ -20,6 +21,7 @@ export class CommentService { @Inject(COMMENT_REPOSITORY) private commentRepo: ICommentRepository, private readonly accountService: AccountService, private readonly akismetService: AkismetService, + private readonly llmSpamService: LlmSpamService, private readonly logger: Logger ) {} @@ -27,16 +29,40 @@ export class CommentService { const settings = await this.accountService.settingsFor(account) const toModeration = settings?.requireModeration ?? false const payload = this.commentToDbParam(account, comment) - if (toModeration || (settings?.akismetKey && settings.useAkismet)) { - const flagIt = toModeration || (settings && (await this.akismetService.isCommentSpam(settings, comment, ip))) - if (flagIt) { - this.logger.warn(`${toModeration ? 'Moderation enforced' : 'SPAM detected'}: ${JSON.stringify(comment)}`) + + // Manual moderation takes precedence + if (toModeration) { + this.logger.warn(`Moderation enforced: ${JSON.stringify(comment)}`) + await this.commentRepo.createFlaggedComment(payload) + return CommentCreatedResult.Flagged + } + + // First pass: Akismet (if configured) + if (settings?.akismetKey && settings.useAkismet) { + const isAkismetSpam = await this.akismetService.isCommentSpam(settings, comment, ip) + if (isAkismetSpam) { + this.logger.warn(`SPAM detected (Akismet): ${JSON.stringify(comment)}`) await this.commentRepo.createFlaggedComment(payload) return CommentCreatedResult.Flagged } } - await this.commentRepo.createComment(payload) + // Second pass: LLM check (if configured) + if (settings?.llmApiKey && settings.useLlmCheck) { + const llmResult = await this.llmSpamService.checkComment(settings, comment) + if (llmResult) { + const threshold = settings.llmConfidenceThreshold ?? 0.8 + if (llmResult.is_spam && llmResult.confidence >= threshold) { + this.logger.warn( + `SPAM detected (LLM, confidence: ${llmResult.confidence}): ${JSON.stringify(comment)}` + ) + await this.commentRepo.createFlaggedComment(payload) + return CommentCreatedResult.Flagged + } + } + } + + await this.commentRepo.createComment(payload) return payload.id } diff --git a/src/shared/comments/llm-spam.service.ts b/src/shared/comments/llm-spam.service.ts new file mode 100644 index 0000000..28dec9a --- /dev/null +++ b/src/shared/comments/llm-spam.service.ts @@ -0,0 +1,76 @@ +import Anthropic from '@anthropic-ai/sdk' +import { CommentBase } from './comment.interface' +import { Injectable } from '@nestjs/common' +import { Logger } from 'nestjs-pino' +import { SettingsParam } from '../accounts/settings.param' + +export interface LlmSpamResult { + is_spam: boolean + confidence: number +} + +@Injectable() +export class LlmSpamService { + constructor(private readonly logger: Logger) {} + + async checkComment( + accountSettings: SettingsParam, + comment: CommentBase + ): Promise { + const apiKey = accountSettings.llmApiKey + if (!apiKey) return + + const client = new Anthropic({ apiKey }) + + const prompt = `You are a spam detection system. Analyze the following comment and determine if it is spam. + +Comment author: ${comment.author.name} +Comment author email: ${comment.author.email || 'not provided'} +Comment author website: ${comment.author.website || 'not provided'} +Comment text: ${comment.text} +Page URL: ${comment.postUrl} + +Respond with ONLY a JSON object in this exact format (no markdown, no explanation): +{"is_spam": true or false, "confidence": 0.0 to 1.0} + +Consider these spam indicators: +- Promotional content or advertisements +- Links to suspicious websites +- Generic or irrelevant content +- Excessive use of keywords +- Poor grammar typical of automated spam +- Mentions of money, gambling, adult content, or pharmaceuticals + +Consider these legitimate comment indicators: +- Relevant to the page content +- Personal opinions or questions +- Natural language patterns +- Engagement with the topic` + + try { + const response = await client.messages.create({ + model: 'claude-haiku-4-20250514', + max_tokens: 100, + messages: [{ role: 'user', content: prompt }], + }) + + const textContent = response.content.find((c) => c.type === 'text') + if (!textContent || textContent.type !== 'text') { + this.logger.warn('LLM spam check returned no text content') + return + } + + const result = JSON.parse(textContent.text) as LlmSpamResult + if (typeof result.is_spam !== 'boolean' || typeof result.confidence !== 'number') { + this.logger.warn(`LLM spam check returned invalid format: ${textContent.text}`) + return + } + + this.logger.debug(`LLM spam check result: ${JSON.stringify(result)}`) + return result + } catch (error) { + this.logger.warn(`Could not reach LLM API: ${(error as Error)?.message}`) + return + } + } +} diff --git a/src/shared/repositories/postgres/postgres-account.repository.ts b/src/shared/repositories/postgres/postgres-account.repository.ts index 48c28b8..727e249 100644 --- a/src/shared/repositories/postgres/postgres-account.repository.ts +++ b/src/shared/repositories/postgres/postgres-account.repository.ts @@ -82,6 +82,9 @@ export class PostgresAccountRepository implements IAccountRepository { useAkismet: s[0].use_akismet ?? false, akismetKey: s[0].akismet_key ?? '', blogUrl: s[0].blog_url ?? '', + useLlmCheck: s[0].use_llm_check ?? false, + llmApiKey: s[0].llm_api_key ?? '', + llmConfidenceThreshold: parseFloat(s[0].llm_confidence_threshold ?? '0.8'), } } @@ -102,6 +105,9 @@ export class PostgresAccountRepository implements IAccountRepository { useAkismet: settings.useAkismet ?? false, akismetKey: settings.akismetKey, blogUrl: settings.blogUrl, + useLlmCheck: settings.useLlmCheck ?? false, + llmApiKey: settings.llmApiKey, + llmConfidenceThreshold: settings.llmConfidenceThreshold ?? 0.8, }, this.client ) diff --git a/src/shared/repositories/sqlite/sqlite-account.repository.ts b/src/shared/repositories/sqlite/sqlite-account.repository.ts index 52091cf..10012e0 100644 --- a/src/shared/repositories/sqlite/sqlite-account.repository.ts +++ b/src/shared/repositories/sqlite/sqlite-account.repository.ts @@ -80,6 +80,9 @@ export class SqliteAccountRepository implements IAccountRepository { useAkismet: Boolean(row.use_akismet), akismetKey: row.akismet_key ?? '', blogUrl: row.blog_url ?? '', + useLlmCheck: Boolean(row.use_llm_check), + llmApiKey: row.llm_api_key ?? '', + llmConfidenceThreshold: row.llm_confidence_threshold ?? 0.8, } } @@ -96,7 +99,8 @@ export class SqliteAccountRepository implements IAccountRepository { async updateSettings(accountId: string, settings: SettingsParam): Promise { const stmt = this.db.prepare(` UPDATE account_settings - SET require_moderation = ?, blog_url = ?, use_akismet = ?, akismet_key = ? + SET require_moderation = ?, blog_url = ?, use_akismet = ?, akismet_key = ?, + use_llm_check = ?, llm_api_key = ?, llm_confidence_threshold = ? WHERE account_id = ? `) stmt.run( @@ -104,6 +108,9 @@ export class SqliteAccountRepository implements IAccountRepository { settings.blogUrl, settings.useAkismet ? 1 : 0, settings.akismetKey, + settings.useLlmCheck ? 1 : 0, + settings.llmApiKey, + settings.llmConfidenceThreshold ?? 0.8, accountId ) } @@ -157,6 +164,9 @@ interface SqliteSettingsRow { akismet_key: string | null use_akismet: number | null require_moderation: number + use_llm_check: number | null + llm_api_key: string | null + llm_confidence_threshold: number | null } interface SqliteEmailSettingsRow { diff --git a/yarn.lock b/yarn.lock index 98e592d..ad6f3cf 100644 --- a/yarn.lock +++ b/yarn.lock @@ -45,6 +45,13 @@ ora "5.4.1" rxjs "7.8.1" +"@anthropic-ai/sdk@^0.72.1": + version "0.72.1" + resolved "https://registry.yarnpkg.com/@anthropic-ai/sdk/-/sdk-0.72.1.tgz#822c46649a1af64df72ec9fa2b8cd852c9795285" + integrity sha512-MiUnue7qN7DvLIoYHgkedN2z05mRf2CutBzjXXY2krzOhG2r/rIfISS2uVkNLikgToB5hYIzw+xp2jdOtRkqYQ== + dependencies: + json-schema-to-ts "^3.1.1" + "@azure/abort-controller@^1.0.0": version "1.1.0" resolved "https://registry.npmjs.org/@azure/abort-controller/-/abort-controller-1.1.0.tgz" @@ -440,6 +447,11 @@ dependencies: "@babel/helper-plugin-utils" "^7.24.7" +"@babel/runtime@^7.18.3": + version "7.28.6" + resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.28.6.tgz#d267a43cb1836dc4d182cce93ae75ba954ef6d2b" + integrity sha512-05WQkdpL9COIMz4LjTxGpPNCdlpyimKppYNoJ5Di5EUObifl8t4tuLuUBBZEpoLYOmfvIWrsp9fCl0HoPRVTdA== + "@babel/template@^7.24.7", "@babel/template@^7.3.3": version "7.24.7" resolved "https://registry.npmjs.org/@babel/template/-/template-7.24.7.tgz" @@ -5763,6 +5775,14 @@ json-parse-even-better-errors@^2.3.0, json-parse-even-better-errors@^2.3.1: resolved "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz" integrity sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w== +json-schema-to-ts@^3.1.1: + version "3.1.1" + resolved "https://registry.yarnpkg.com/json-schema-to-ts/-/json-schema-to-ts-3.1.1.tgz#81f3acaf5a34736492f6f5f51870ef9ece1ca853" + integrity sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g== + dependencies: + "@babel/runtime" "^7.18.3" + ts-algebra "^2.0.0" + json-schema-to-typescript@13.*: version "13.1.2" resolved "https://registry.npmjs.org/json-schema-to-typescript/-/json-schema-to-typescript-13.1.2.tgz" @@ -8639,6 +8659,11 @@ trim-newlines@^3.0.0: resolved "https://registry.npmjs.org/true-case-path/-/true-case-path-2.2.1.tgz" integrity sha512-0z3j8R7MCjy10kc/g+qg7Ln3alJTodw9aDuVWZa3uiWqfuBMKeAeP2ocWcxoyM3D73yz3Jt/Pu4qPr4wHSdB/Q== +ts-algebra@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/ts-algebra/-/ts-algebra-2.0.0.tgz#4e3e0953878f26518fce7f6bb115064a65388b7a" + integrity sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw== + ts-api-utils@^1.3.0: version "1.3.0" resolved "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-1.3.0.tgz"