diff --git a/cpp/pom.xml b/cpp/pom.xml new file mode 100644 index 000000000..b16687f8e --- /dev/null +++ b/cpp/pom.xml @@ -0,0 +1,149 @@ + + + + 4.0.0 + + + com.ibm + sonar-cryptography + 1.5.1-SNAPSHOT + ../pom.xml + + + sonar-cryptography-cpp + Sonar Cryptography Plugin :: C/C++ + + C and C++ language support for the Sonar Cryptography Plugin, + with detection rules for OpenSSL and other cryptographic libraries. + + + + 17 + 17 + + + + + + + com.ibm + sonar-cryptography-common + ${project.version} + + + + com.ibm + sonar-cryptography-engine + ${project.version} + + + + com.ibm + sonar-cryptography-mapper + ${project.version} + + + + com.ibm + sonar-cryptography-enricher + ${project.version} + + + + com.ibm + sonar-cryptography-output + ${project.version} + + + + + org.sonarsource.api.plugin + sonar-plugin-api + provided + + + + + org.antlr + antlr4-runtime + + + + + com.google.code.findbugs + jsr305 + + + + + org.slf4j + slf4j-api + + + + + org.junit.jupiter + junit-jupiter + test + + + + org.assertj + assertj-core + test + + + + org.mockito + mockito-core + test + + + + org.sonarsource.api.plugin + sonar-plugin-api-test-fixtures + test + + + + + + + + + + com.diffplug.spotless + spotless-maven-plugin + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + + + + diff --git a/engine/src/main/antlr4/com/ibm/engine/language/csharp/antlr/CLexer.g4 b/engine/src/main/antlr4/com/ibm/engine/language/csharp/antlr/CLexer.g4 new file mode 100644 index 000000000..581c03cad --- /dev/null +++ b/engine/src/main/antlr4/com/ibm/engine/language/csharp/antlr/CLexer.g4 @@ -0,0 +1,359 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * C Lexer Grammar for CBOMkit sonar-cryptography plugin. + * Adapted from the ANTLR4 grammars-v4 C grammar: + * https://github.com/antlr/grammars-v4/tree/master/c + * + * Supports C89, C99, C11 constructs sufficient for detecting + * OpenSSL and other cryptographic library API calls. + */ +lexer grammar CLexer; + +// --------------------------------------------------------------------------- +// Keywords +// --------------------------------------------------------------------------- + +Auto : 'auto'; +Break : 'break'; +Case : 'case'; +Char : 'char'; +Const : 'const'; +Continue : 'continue'; +Default : 'default'; +Do : 'do'; +Double : 'double'; +Else : 'else'; +Enum : 'enum'; +Extern : 'extern'; +Float : 'float'; +For : 'for'; +Goto : 'goto'; +If : 'if'; +Inline : 'inline'; +Int : 'int'; +Long : 'long'; +Register : 'register'; +Restrict : 'restrict'; +Return : 'return'; +Short : 'short'; +Signed : 'signed'; +Sizeof : 'sizeof'; +Static : 'static'; +Struct : 'struct'; +Switch : 'switch'; +Typedef : 'typedef'; +Union : 'union'; +Unsigned : 'unsigned'; +Void : 'void'; +Volatile : 'volatile'; +While : 'while'; + +// C11 keywords +Alignas : '_Alignas'; +Alignof : '_Alignof'; +Atomic : '_Atomic'; +Bool : '_Bool'; +Complex : '_Complex'; +Generic : '_Generic'; +Imaginary : '_Imaginary'; +Noreturn : '_Noreturn'; +StaticAssert : '_Static_assert'; +ThreadLocal : '_Thread_local'; + +// GCC extensions (common in OpenSSL code) +BuiltinVaArg : '__builtin_va_arg'; +BuiltinOffsetof : '__builtin_offsetof'; + +// --------------------------------------------------------------------------- +// Punctuators and operators +// --------------------------------------------------------------------------- + +LeftParen : '('; +RightParen : ')'; +LeftBracket : '['; +RightBracket : ']'; +LeftBrace : '{'; +RightBrace : '}'; + +Less : '<'; +LessEqual : '<='; +Greater : '>'; +GreaterEqual : '>='; +LeftShift : '<<'; +RightShift : '>>'; + +Plus : '+'; +PlusPlus : '++'; +Minus : '-'; +MinusMinus : '--'; +Star : '*'; +Div : '/'; +Mod : '%'; + +And : '&'; +Or : '|'; +AndAnd : '&&'; +OrOr : '||'; +Caret : '^'; +Not : '!'; +Tilde : '~'; + +Question : '?'; +Colon : ':'; +Semi : ';'; +Comma : ','; +Assign : '='; + +// Compound assignment operators +StarAssign : '*='; +DivAssign : '/='; +ModAssign : '%='; +PlusAssign : '+='; +MinusAssign : '-='; +LeftShiftAssign : '<<='; +RightShiftAssign: '>>='; +AndAssign : '&='; +XorAssign : '^='; +OrAssign : '|='; + +Equal : '=='; +NotEqual : '!='; + +Arrow : '->'; +Dot : '.'; +Ellipsis : '...'; + +// --------------------------------------------------------------------------- +// Literals +// --------------------------------------------------------------------------- + +IntegerConstant + : DecimalConstant IntegerSuffix? + | OctalConstant IntegerSuffix? + | HexadecimalConstant IntegerSuffix? + | BinaryConstant + ; + +fragment BinaryConstant + : '0' [bB] [0-1]+ + ; + +fragment DecimalConstant + : NonzeroDigit Digit* + ; + +fragment OctalConstant + : '0' OctalDigit* + ; + +fragment HexadecimalConstant + : HexadecimalPrefix HexadecimalDigit+ + ; + +fragment HexadecimalPrefix + : '0' [xX] + ; + +fragment IntegerSuffix + : UnsignedSuffix LongSuffix? + | UnsignedSuffix LongLongSuffix + | LongSuffix UnsignedSuffix? + | LongLongSuffix UnsignedSuffix? + ; + +fragment UnsignedSuffix + : [uU] + ; + +fragment LongSuffix + : [lL] + ; + +fragment LongLongSuffix + : 'll' | 'LL' + ; + +FloatingConstant + : DecimalFloatingConstant + | HexadecimalFloatingConstant + ; + +fragment DecimalFloatingConstant + : FractionalConstant ExponentPart? FloatingSuffix? + | DigitSequence ExponentPart FloatingSuffix? + ; + +fragment HexadecimalFloatingConstant + : HexadecimalPrefix (HexadecimalFractionalConstant | HexadecimalDigitSequence) BinaryExponentPart FloatingSuffix? + ; + +fragment FractionalConstant + : DigitSequence? '.' DigitSequence + | DigitSequence '.' + ; + +fragment ExponentPart + : [eE] Sign? DigitSequence + ; + +fragment Sign + : [+-] + ; + +fragment DigitSequence + : Digit+ + ; + +fragment HexadecimalFractionalConstant + : HexadecimalDigitSequence? '.' HexadecimalDigitSequence + | HexadecimalDigitSequence '.' + ; + +fragment BinaryExponentPart + : [pP] Sign? DigitSequence + ; + +fragment HexadecimalDigitSequence + : HexadecimalDigit+ + ; + +fragment FloatingSuffix + : [flFL] + ; + +CharacterConstant + : '\'' CCharSequence '\'' + | 'L\'' CCharSequence '\'' + | 'u\'' CCharSequence '\'' + | 'U\'' CCharSequence '\'' + ; + +fragment CCharSequence + : CChar+ + ; + +fragment CChar + : ~['\\\r\n] + | EscapeSequence + ; + +StringLiteral + : EncodingPrefix? '"' SCharSequence? '"' + ; + +fragment EncodingPrefix + : 'u8' | 'u' | 'U' | 'L' + ; + +fragment SCharSequence + : SChar+ + ; + +fragment SChar + : ~["\\\r\n] + | EscapeSequence + | '\\\n' // Added line + | '\\\r\n' // Added line + ; + +fragment EscapeSequence + : SimpleEscapeSequence + | OctalEscapeSequence + | HexadecimalEscapeSequence + | UniversalCharacterName + ; + +fragment SimpleEscapeSequence + : '\\' ['"?abfnrtvv\\] + ; + +fragment OctalEscapeSequence + : '\\' OctalDigit OctalDigit? OctalDigit? + ; + +fragment HexadecimalEscapeSequence + : '\\x' HexadecimalDigit+ + ; + +// --------------------------------------------------------------------------- +// Identifiers +// --------------------------------------------------------------------------- + +Identifier + : IdentifierNondigit (IdentifierNondigit | Digit)* + ; + +fragment IdentifierNondigit + : Nondigit + | UniversalCharacterName + ; + +fragment Nondigit + : [a-zA-Z_] + ; + +fragment Digit + : [0-9] + ; + +fragment NonzeroDigit + : [1-9] + ; + +fragment OctalDigit + : [0-7] + ; + +fragment HexadecimalDigit + : [0-9a-fA-F] + ; + +fragment UniversalCharacterName + : '\\u' HexadecimalDigit HexadecimalDigit HexadecimalDigit HexadecimalDigit + | '\\U' HexadecimalDigit HexadecimalDigit HexadecimalDigit HexadecimalDigit + HexadecimalDigit HexadecimalDigit HexadecimalDigit HexadecimalDigit + ; + +// --------------------------------------------------------------------------- +// Preprocessor directives (skip — we do not expand macros) +// --------------------------------------------------------------------------- + +Directive + : '#' ~[\r\n]* -> skip + ; + +// --------------------------------------------------------------------------- +// Whitespace and comments +// --------------------------------------------------------------------------- + +Whitespace + : [ \t]+ -> skip + ; + +Newline + : ( '\r' '\n'? | '\n' ) -> skip + ; + +BlockComment + : '/*' .*? '*/' -> skip + ; + +LineComment + : '//' ~[\r\n]* -> skip + ; diff --git a/engine/src/main/antlr4/com/ibm/engine/language/csharp/antlr/CParser.g4 b/engine/src/main/antlr4/com/ibm/engine/language/csharp/antlr/CParser.g4 new file mode 100644 index 000000000..41866acf8 --- /dev/null +++ b/engine/src/main/antlr4/com/ibm/engine/language/csharp/antlr/CParser.g4 @@ -0,0 +1,494 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * C Parser Grammar for CBOMkit sonar-cryptography plugin. + * Adapted from the ANTLR4 grammars-v4 C grammar: + * https://github.com/antlr/grammars-v4/tree/master/c + * + * Supports C89, C99, C11 constructs sufficient for detecting + * OpenSSL and other cryptographic library API calls. + * + * Key design goals: + * - Detect function calls: EVP_EncryptInit_ex(ctx, EVP_aes_256_cbc(), ...) + * - Detect string literals used as algorithm identifiers + * - Detect integer constants used as key sizes + * - Handle pointer dereferences and struct member access + */ +parser grammar CParser; + +options { + tokenVocab = CLexer; +} + +// --------------------------------------------------------------------------- +// Top-level rule +// --------------------------------------------------------------------------- + +compilationUnit + : translationUnit? EOF + ; + +translationUnit + : externalDeclaration+ + ; + +externalDeclaration + : functionDefinition + | declaration + | Semi // stray semicolons + ; + +// --------------------------------------------------------------------------- +// Function definitions +// --------------------------------------------------------------------------- + +functionDefinition + : declarationSpecifiers declarator declarationList? compoundStatement + ; + +declarationList + : declaration+ + ; + +// --------------------------------------------------------------------------- +// Declarations +// --------------------------------------------------------------------------- + +declaration + : declarationSpecifiers initDeclaratorList? Semi + | staticAssertDeclaration + ; + +declarationSpecifiers + : declarationSpecifier+ + ; + +declarationSpecifiers2 + : declarationSpecifier+ + ; + +declarationSpecifier + : storageClassSpecifier + | typeSpecifier + | typeQualifier + | functionSpecifier + | alignmentSpecifier + ; + +initDeclaratorList + : initDeclarator ( Comma initDeclarator )* + ; + +initDeclarator + : declarator ( Assign initializer )? + ; + +storageClassSpecifier + : Typedef + | Extern + | Static + | ThreadLocal + | Auto + | Register + ; + +typeSpecifier + : Void + | Char + | Short + | Int + | Long + | Float + | Double + | Signed + | Unsigned + | Bool + | Complex + | atomicTypeSpecifier + | structOrUnionSpecifier + | enumSpecifier + | typedefName + | typeofSpecifier + ; + +// typeof() — GCC extension common in OpenSSL +typeofSpecifier + : '__typeof__' LeftParen (expression | typeName) RightParen + | '__typeof' LeftParen (expression | typeName) RightParen + ; + +structOrUnionSpecifier + : structOrUnion Identifier? LeftBrace structDeclarationList RightBrace + | structOrUnion Identifier + ; + +structOrUnion + : 'struct' + | 'union' + ; + +structDeclarationList + : structDeclaration+ + ; + +structDeclaration + : specifierQualifierList structDeclaratorList? Semi + | staticAssertDeclaration + ; + +specifierQualifierList + : (typeSpecifier | typeQualifier | alignmentSpecifier)+ + ; + +structDeclaratorList + : structDeclarator (Comma structDeclarator)* + ; + +structDeclarator + : declarator + | declarator? Colon constantExpression + ; + +enumSpecifier + : 'enum' Identifier? LeftBrace enumeratorList Comma? RightBrace + | 'enum' Identifier + ; + +enumeratorList + : enumerator (Comma enumerator)* + ; + +enumerator + : enumerationConstant (Assign constantExpression)? + ; + +enumerationConstant + : Identifier + ; + +atomicTypeSpecifier + : Atomic LeftParen typeName RightParen + ; + +typeQualifier + : Const + | Restrict + | Volatile + | Atomic + ; + +functionSpecifier + : Inline + | Noreturn + ; + +alignmentSpecifier + : Alignas LeftParen (typeName | constantExpression) RightParen + ; + +declarator + : pointer? directDeclarator + ; + +directDeclarator + : Identifier + | LeftParen declarator RightParen + | directDeclarator LeftBracket typeQualifierList? assignmentExpression? RightBracket + | directDeclarator LeftBracket Static typeQualifierList? assignmentExpression RightBracket + | directDeclarator LeftBracket typeQualifierList Static assignmentExpression RightBracket + | directDeclarator LeftBracket typeQualifierList? Star RightBracket + | directDeclarator LeftParen parameterTypeList RightParen + | directDeclarator LeftParen identifierList? RightParen + ; + +pointer + : ( Star typeQualifierList? )+ + ; + +typeQualifierList + : typeQualifier+ + ; + +parameterTypeList + : parameterList ( Comma Ellipsis )? + ; + +parameterList + : parameterDeclaration ( Comma parameterDeclaration )* + ; + +parameterDeclaration + : declarationSpecifiers declarator + | declarationSpecifiers2 abstractDeclarator? + ; + +identifierList + : Identifier ( Comma Identifier )* + ; + +typeName + : specifierQualifierList abstractDeclarator? + ; + +abstractDeclarator + : pointer + | pointer? directAbstractDeclarator + ; + +directAbstractDeclarator + : LeftParen abstractDeclarator RightParen + | LeftBracket typeQualifierList? assignmentExpression? RightBracket + | LeftBracket Static typeQualifierList? assignmentExpression RightBracket + | LeftBracket typeQualifierList Static assignmentExpression RightBracket + | LeftBracket Star RightBracket + | LeftParen parameterTypeList? RightParen + | directAbstractDeclarator LeftBracket typeQualifierList? assignmentExpression? RightBracket + | directAbstractDeclarator LeftBracket Static typeQualifierList? assignmentExpression RightBracket + | directAbstractDeclarator LeftBracket typeQualifierList Static assignmentExpression RightBracket + | directAbstractDeclarator LeftBracket Star RightBracket + | directAbstractDeclarator LeftParen parameterTypeList? RightParen + ; + +typedefName + : Identifier + ; + +initializer + : assignmentExpression + | LeftBrace initializerList Comma? RightBrace + ; + +initializerList + : designation? initializer ( Comma designation? initializer )* + ; + +designation + : designatorList Assign + ; + +designatorList + : designator+ + ; + +designator + : LeftBracket constantExpression RightBracket + | Dot Identifier + ; + +staticAssertDeclaration + : StaticAssert LeftParen constantExpression Comma StringLiteral+ RightParen Semi + ; + +// --------------------------------------------------------------------------- +// Statements +// --------------------------------------------------------------------------- + +statement + : labeledStatement + | compoundStatement + | expressionStatement + | selectionStatement + | iterationStatement + | jumpStatement + ; + +labeledStatement + : Identifier Colon statement + | Case constantExpression Colon statement + | Default Colon statement + ; + +compoundStatement + : LeftBrace blockItemList? RightBrace + ; + +blockItemList + : blockItem+ + ; + +blockItem + : statement + | declaration + ; + +expressionStatement + : expression? Semi + ; + +selectionStatement + : If LeftParen expression RightParen statement ( Else statement )? + | Switch LeftParen expression RightParen statement + ; + +iterationStatement + : While LeftParen expression RightParen statement + | Do statement While LeftParen expression RightParen Semi + | For LeftParen forCondition RightParen statement + ; + +forCondition + : ( forDeclaration | expression? ) Semi forExpression? Semi forExpression? + ; + +forDeclaration + : declarationSpecifiers initDeclaratorList? + ; + +forExpression + : assignmentExpression ( Comma assignmentExpression )* + ; + +jumpStatement + : Goto Identifier Semi + | Continue Semi + | Break Semi + | Return expression? Semi + ; + +// --------------------------------------------------------------------------- +// Expressions +// --------------------------------------------------------------------------- + +compilationUnit2 + : expression EOF + ; + +expression + : assignmentExpression ( Comma assignmentExpression )* + ; + +assignmentExpression + : conditionalExpression + | unaryExpression assignmentOperator assignmentExpression + ; + +assignmentOperator + : Assign | StarAssign | DivAssign | ModAssign | PlusAssign | MinusAssign + | LeftShiftAssign | RightShiftAssign | AndAssign | XorAssign | OrAssign + ; + +conditionalExpression + : logicalOrExpression ( Question expression Colon conditionalExpression )? + ; + +logicalOrExpression + : logicalAndExpression ( OrOr logicalAndExpression )* + ; + +logicalAndExpression + : inclusiveOrExpression ( AndAnd inclusiveOrExpression )* + ; + +inclusiveOrExpression + : exclusiveOrExpression ( Or exclusiveOrExpression )* + ; + +exclusiveOrExpression + : andExpression ( Caret andExpression )* + ; + +andExpression + : equalityExpression ( And equalityExpression )* + ; + +equalityExpression + : relationalExpression ( ( Equal | NotEqual ) relationalExpression )* + ; + +relationalExpression + : shiftExpression ( ( Less | Greater | LessEqual | GreaterEqual ) shiftExpression )* + ; + +shiftExpression + : additiveExpression ( ( LeftShift | RightShift ) additiveExpression )* + ; + +additiveExpression + : multiplicativeExpression ( ( Plus | Minus ) multiplicativeExpression )* + ; + +multiplicativeExpression + : castExpression ( ( Star | Div | Mod ) castExpression )* + ; + +castExpression + : LeftParen typeName RightParen castExpression + | unaryExpression + ; + +unaryExpression + : postfixExpression + | PlusPlus unaryExpression + | MinusMinus unaryExpression + | unaryOperator castExpression + | Sizeof ( unaryExpression | LeftParen typeName RightParen ) + | Alignof LeftParen typeName RightParen + | BuiltinVaArg LeftParen unaryExpression Comma typeName RightParen + | BuiltinOffsetof LeftParen typeName Comma unaryExpression RightParen + ; + +unaryOperator + : And | Star | Plus | Minus | Tilde | Not + ; + +// --------------------------------------------------------------------------- +// Postfix expressions — KEY RULE for detecting function calls +// --------------------------------------------------------------------------- + +postfixExpression + : primaryExpression + | postfixExpression LeftBracket expression RightBracket // array index + | postfixExpression LeftParen argumentExpressionList? RightParen // FUNCTION CALL ← detect this + | postfixExpression ( Dot | Arrow ) Identifier // member access + | postfixExpression ( PlusPlus | MinusMinus ) + | LeftParen typeName RightParen LeftBrace initializerList Comma? RightBrace + ; + +argumentExpressionList + : assignmentExpression ( Comma assignmentExpression )* + ; + +primaryExpression + : Identifier + | Constant + | StringLiteral+ + | LeftParen expression RightParen + | genericSelection + ; + +Constant + : IntegerConstant + | FloatingConstant + | CharacterConstant + ; + +genericSelection + : Generic LeftParen assignmentExpression Comma genericAssocList RightParen + ; + +genericAssocList + : genericAssociation ( Comma genericAssociation )* + ; + +genericAssociation + : ( typeName | Default ) Colon assignmentExpression + ; + +constantExpression + : conditionalExpression + ; diff --git a/engine/src/main/java/com/ibm/engine/language/cpp/CppCheck.java b/engine/src/main/java/com/ibm/engine/language/cpp/CppCheck.java new file mode 100644 index 000000000..23ab965ee --- /dev/null +++ b/engine/src/main/java/com/ibm/engine/language/cpp/CppCheck.java @@ -0,0 +1,36 @@ +/* + * Sonar Cryptography Plugin + * Copyright (C) 2025 PQCA + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ibm.engine.language.cpp; + +/** + * Marker interface for C/C++ detection rules. + * + *

This interface fills the {@code R} (Rule) generic type parameter used throughout the engine. + * It is the C/C++ equivalent of {@code JavaCheck} in the Java language support and {@code + * CSharpCheck} in the C# language support. + * + *

All C/C++ detection rule classes should implement this interface so that the engine's generic + * machinery can operate in a type-safe way across language modules. + */ +public interface CppCheck { + // Marker interface — no methods required. + // The engine uses this as a type bound: IDetectionRule, + // ILanguageSupport, etc. +} diff --git a/engine/src/main/java/com/ibm/engine/language/cpp/CppSymbol.java b/engine/src/main/java/com/ibm/engine/language/cpp/CppSymbol.java new file mode 100644 index 000000000..e06489eb7 --- /dev/null +++ b/engine/src/main/java/com/ibm/engine/language/cpp/CppSymbol.java @@ -0,0 +1,109 @@ +/* + * Sonar Cryptography Plugin + * Copyright (C) 2025 PQCA + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ibm.engine.language.cpp; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +/** + * Represents a symbol (variable, function, or type name) in C/C++ source code. + * + *

This class fills the {@code S} (Symbol) generic type parameter used throughout the engine. It + * is the C/C++ equivalent of {@code Symbol} in the Java language support and {@code CSharpSymbol} + * in the C# language support. + * + *

In C/C++, because we use ANTLR for parsing (not a full Sonar language analyzer), we do not + * have a rich semantic model with full type resolution. This class therefore provides a lightweight + * symbol representation based on the identifier name and its resolved string value (if any). + * + *

Known limitation: unlike Java's {@code Symbol}, this class does not support cross-method + * variable tracking. Symbol resolution is limited to single-function scope, consistent with the + * current C# implementation approach. + */ +public final class CppSymbol { + + /** The name of the identifier as it appears in source code (e.g., {@code "ctx"}, {@code "key"}). */ + @Nonnull private final String name; + + /** + * The resolved string value of this symbol, if it can be statically determined. For example, + * if the code contains {@code const char *algo = "AES-256-CBC"}, then {@code resolvedValue} + * would be {@code "AES-256-CBC"}. {@code null} if the value cannot be statically resolved. + */ + @Nullable private final String resolvedValue; + + /** + * Creates a new {@code CppSymbol} with a name and no resolved value. + * + * @param name the identifier name as it appears in source code + */ + public CppSymbol(@Nonnull String name) { + this.name = name; + this.resolvedValue = null; + } + + /** + * Creates a new {@code CppSymbol} with both a name and a statically resolved value. + * + * @param name the identifier name as it appears in source code + * @param resolvedValue the statically resolved string value, or {@code null} if not resolvable + */ + public CppSymbol(@Nonnull String name, @Nullable String resolvedValue) { + this.name = name; + this.resolvedValue = resolvedValue; + } + + /** + * Returns the identifier name as it appears in source code. + * + * @return the symbol name, never {@code null} + */ + @Nonnull + public String name() { + return name; + } + + /** + * Returns the statically resolved string value of this symbol, if available. + * + * @return the resolved value, or {@code null} if it cannot be statically determined + */ + @Nullable + public String resolvedValue() { + return resolvedValue; + } + + /** + * Returns {@code true} if this symbol has a statically resolved value. + * + * @return {@code true} if {@link #resolvedValue()} is non-null + */ + public boolean isResolved() { + return resolvedValue != null; + } + + @Override + public String toString() { + if (resolvedValue != null) { + return "CppSymbol{name='" + name + "', resolvedValue='" + resolvedValue + "'}"; + } + return "CppSymbol{name='" + name + "'}"; + } +} diff --git a/engine/src/main/java/com/ibm/engine/language/cpp/Cppbasemethodvisitor.java b/engine/src/main/java/com/ibm/engine/language/cpp/Cppbasemethodvisitor.java new file mode 100644 index 000000000..c12c8b419 --- /dev/null +++ b/engine/src/main/java/com/ibm/engine/language/cpp/Cppbasemethodvisitor.java @@ -0,0 +1,67 @@ +/* + * Sonar Cryptography Plugin + * Copyright (C) 2025 PQCA + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ibm.engine.language.cpp; + +import com.ibm.engine.detection.IBaseMethodVisitor; +import com.ibm.engine.detection.IDetectionEngine; +import com.ibm.engine.detection.TraceSymbol; +import com.ibm.engine.language.cpp.tree.CppBlockTree; +import com.ibm.engine.language.cpp.tree.CppTree; +import javax.annotation.Nonnull; + +/** + * Base method visitor for C/C++ that invokes the detection engine on each function body. + * + *

Mirrors {@link com.ibm.engine.language.csharp.CSharpBaseMethodVisitor}: when the sensor + * dispatches a function body (a {@link CppBlockTree}) via {@link #visitMethodDefinition}, the + * detection engine is run on it. + * + *

In C/C++ every top-level function body is a {@link CppBlockTree}. The ANTLR tree converter + * ({@code CppTreeConverter}) extracts one {@link CppBlockTree} per function body, and the sensor + * calls {@link #visitMethodDefinition} once per block. The detection engine then walks the block's + * statement list looking for function calls that match registered detection rules. + */ +public final class CppBaseMethodVisitor implements IBaseMethodVisitor { + + @Nonnull private final TraceSymbol traceSymbol; + @Nonnull private final IDetectionEngine detectionEngine; + + public CppBaseMethodVisitor( + @Nonnull TraceSymbol traceSymbol, + @Nonnull IDetectionEngine detectionEngine) { + this.traceSymbol = traceSymbol; + this.detectionEngine = detectionEngine; + } + + /** + * Visits a C/C++ function body and runs the detection engine on it. + * + *

Only {@link CppBlockTree} nodes are processed — any other tree type is ignored because + * C/C++ detection rules operate at the block (function body) level. + * + * @param method a tree node representing a function body, expected to be a {@link CppBlockTree} + */ + @Override + public void visitMethodDefinition(@Nonnull CppTree method) { + if (method instanceof CppBlockTree blockTree) { + detectionEngine.run(traceSymbol, blockTree); + } + } +} diff --git a/engine/src/main/java/com/ibm/engine/language/cpp/Cpplanguagetranslation.java b/engine/src/main/java/com/ibm/engine/language/cpp/Cpplanguagetranslation.java new file mode 100644 index 000000000..0685640df --- /dev/null +++ b/engine/src/main/java/com/ibm/engine/language/cpp/Cpplanguagetranslation.java @@ -0,0 +1,149 @@ +/* + * Sonar Cryptography Plugin + * Copyright (C) 2025 PQCA + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ibm.engine.language.cpp; + +import com.ibm.engine.detection.IType; +import com.ibm.engine.detection.MatchContext; +import com.ibm.engine.language.ILanguageTranslation; +import com.ibm.engine.language.cpp.tree.CppIdentifierTree; +import com.ibm.engine.language.cpp.tree.CppLiteralTree; +import com.ibm.engine.language.cpp.tree.CppMemberAccessTree; +import com.ibm.engine.language.cpp.tree.CppMethodInvocationTree; +import com.ibm.engine.language.cpp.tree.CppTree; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import javax.annotation.Nonnull; + +/** + * Language translation implementation for C/C++. + * + *

Mirrors {@link com.ibm.engine.language.csharp.CSharpLanguageTranslation}: extracts method + * names, object type strings, and parameter information from the {@link CppTree} hierarchy produced + * by the ANTLR4-based tree converter. + * + *

Since ANTLR4 provides only syntactic information (no full C/C++ type inference), all parameter + * types are treated as matching any expected type. Object type matching is string-based: the engine + * matches the inferred type name (e.g. {@code "EVP_CIPHER_CTX"}) against the type strings declared + * in detection rules. + * + *

For C/C++ there are no constructors in the Java/C# sense — all initialization goes through + * function calls like {@code EVP_CIPHER_CTX_new()} — so {@code forConstructor()} is not used. + * The {@code ""} sentinel is therefore never returned here. + */ +public final class CppLanguageTranslation implements ILanguageTranslation { + + @Nonnull + @Override + public Optional getMethodName( + @Nonnull MatchContext matchContext, @Nonnull CppTree methodInvocation) { + if (methodInvocation instanceof CppMethodInvocationTree invocation) { + return Optional.of(invocation.methodName()); + } + return Optional.empty(); + } + + @Nonnull + @Override + public Optional getInvokedObjectTypeString( + @Nonnull MatchContext matchContext, @Nonnull CppTree methodInvocation) { + if (methodInvocation instanceof CppMethodInvocationTree invocation) { + String typeName = invocation.objectType(); + if (typeName == null) { + // No inferred object type — match any type to avoid blocking detection + return Optional.of(expectedType -> true); + } + return Optional.of(expectedType -> expectedType.equals(typeName)); + } + return Optional.empty(); + } + + @Nonnull + @Override + public Optional getMethodReturnTypeString( + @Nonnull MatchContext matchContext, @Nonnull CppTree methodInvocation) { + // ANTLR4 provides no type inference; return type unavailable + return Optional.empty(); + } + + @Nonnull + @Override + public List getMethodParameterTypes( + @Nonnull MatchContext matchContext, @Nonnull CppTree methodInvocation) { + if (!(methodInvocation instanceof CppMethodInvocationTree invocation)) { + return Collections.emptyList(); + } + List args = invocation.arguments(); + if (args.isEmpty()) { + return Collections.emptyList(); + } + // No semantic type info available from ANTLR4 — every argument matches any expected type + List types = new ArrayList<>(args.size()); + for (int i = 0; i < args.size(); i++) { + types.add(expectedType -> true); + } + return types; + } + + @Nonnull + @Override + public Optional resolveIdentifierAsString( + @Nonnull MatchContext matchContext, @Nonnull CppTree identifierTree) { + if (identifierTree instanceof CppLiteralTree literal) { + // Return the unquoted value so string literals like "AES-256-CBC" + // are resolved as AES-256-CBC (without surrounding quotes) + return Optional.of(literal.unquotedValue()); + } else if (identifierTree instanceof CppIdentifierTree identifier) { + return Optional.of(identifier.name()); + } + return Optional.empty(); + } + + @Nonnull + @Override + public Optional getEnumIdentifierName( + @Nonnull MatchContext matchContext, @Nonnull CppTree enumIdentifier) { + // C has no enum member-access syntax like C# (CipherMode.CBC). + // Enum constants appear as plain identifiers (EVP_CIPH_CBC_MODE) + // or as member accesses on a struct (params->mode). + if (enumIdentifier instanceof CppMemberAccessTree memberAccess) { + return Optional.of(memberAccess.memberName()); + } else if (enumIdentifier instanceof CppIdentifierTree identifier) { + return Optional.of(identifier.name()); + } + return Optional.empty(); + } + + @Nonnull + @Override + public Optional getEnumClassName( + @Nonnull MatchContext matchContext, @Nonnull CppTree enumClass) { + // C does not have qualified enum syntax like C#. + // Member access object side is returned as a best-effort "class" name. + if (enumClass instanceof CppMemberAccessTree memberAccess) { + CppTree object = memberAccess.object(); + if (object instanceof CppIdentifierTree identifier) { + return Optional.of(identifier.name()); + } + } + return Optional.empty(); + } +} diff --git a/engine/src/main/java/com/ibm/engine/language/cpp/Cppscancontext.java b/engine/src/main/java/com/ibm/engine/language/cpp/Cppscancontext.java new file mode 100644 index 000000000..c3018e7da --- /dev/null +++ b/engine/src/main/java/com/ibm/engine/language/cpp/Cppscancontext.java @@ -0,0 +1,85 @@ +/* + * Sonar Cryptography Plugin + * Copyright (C) 2025 PQCA + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ibm.engine.language.cpp; + +import com.ibm.engine.language.IScanContext; +import com.ibm.engine.language.cpp.tree.CppTree; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import org.sonar.api.batch.fs.InputFile; +import org.sonar.api.batch.sensor.SensorContext; +import org.sonar.api.batch.sensor.issue.NewIssue; +import org.sonar.api.batch.sensor.issue.NewIssueLocation; +import org.sonar.api.rule.RuleKey; +import org.sonar.check.Rule; + +/** + * C/C++ scan context wrapping the SonarQube {@link SensorContext}. + * + *

Mirrors {@link com.ibm.engine.language.csharp.CSharpScanContext}: since there is no + * sonar-cxx framework integration, we hold the raw {@link SensorContext} and {@link InputFile} + * and report issues directly via the SonarQube sensor API. + * + *

This class fills the {@code P} (Publisher) generic type parameter used throughout the engine. + * + * @param sensorContext the SonarQube sensor context for the current analysis run + * @param inputFile the C/C++ source file currently being analysed + * @param repositoryKey the rule repository key (e.g. {@code "sonar-cpp-crypto"}) + */ +public record CppScanContext( + @Nonnull SensorContext sensorContext, + @Nonnull InputFile inputFile, + @Nonnull String repositoryKey) + implements IScanContext { + + @Override + public void reportIssue( + @Nonnull CppCheck currentRule, + @Nonnull CppTree tree, + @Nonnull String message) { + String ruleKey = getRuleKey(currentRule); + if (ruleKey == null) { + return; + } + int line = Math.max(1, tree.line()); + NewIssue issue = sensorContext.newIssue(); + NewIssueLocation location = + issue.newLocation().on(inputFile).at(inputFile.selectLine(line)).message(message); + issue.forRule(RuleKey.of(repositoryKey, ruleKey)).at(location).save(); + } + + @Nullable + private static String getRuleKey(@Nonnull CppCheck rule) { + Rule annotation = rule.getClass().getAnnotation(Rule.class); + return annotation != null ? annotation.key() : null; + } + + @Nonnull + @Override + public InputFile getInputFile() { + return inputFile; + } + + @Nonnull + @Override + public String getFilePath() { + return inputFile.uri().getPath(); + } +} diff --git a/engine/src/main/java/com/ibm/engine/language/cpp/tree/CppIdentifierTree.java b/engine/src/main/java/com/ibm/engine/language/cpp/tree/CppIdentifierTree.java new file mode 100644 index 000000000..a994a330f --- /dev/null +++ b/engine/src/main/java/com/ibm/engine/language/cpp/tree/CppIdentifierTree.java @@ -0,0 +1,136 @@ +/* + * Sonar Cryptography Plugin + * Copyright (C) 2025 PQCA + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ibm.engine.language.cpp.tree; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +/** + * A C/C++ tree node representing an identifier — a variable name, function name, or type name as + * it appears in an expression. + * + *

Examples of identifiers this node captures: + * + *

+ * + *

Example C code producing this node: + * + *

{@code
+ * EVP_CIPHER_CTX *ctx = EVP_CIPHER_CTX_new();
+ * //               ^^^                  ^^^
+ * //        identifier (variable)   identifier (function)
+ * }
+ * + *

The engine uses this node to match function names against detection rules. When the detection + * engine sees a {@link CppMethodInvocationTree}, it checks the callee identifier name against the + * patterns defined in each {@code IDetectionRule}. + */ +public final class CppIdentifierTree implements CppTree { + + /** The identifier name exactly as it appears in source code. */ + @Nonnull private final String name; + + /** + * The optional type string of this identifier, derived from context (e.g. {@code + * "EVP_CIPHER_CTX *"} for a pointer variable). This is used by the engine's {@code + * forObjectTypes} matching. May be {@code null} when the type cannot be inferred from the + * current single-method scope. + */ + @Nullable private final String type; + + private final int line; + private final int column; + + /** + * Creates a new {@code CppIdentifierTree} without type information. + * + * @param name the identifier name as it appears in source code + * @param line 1-based line number in the source file + * @param column 0-based column offset within the line + */ + public CppIdentifierTree(@Nonnull String name, int line, int column) { + this.name = name; + this.type = null; + this.line = line; + this.column = column; + } + + /** + * Creates a new {@code CppIdentifierTree} with optional type information. + * + * @param name the identifier name as it appears in source code + * @param type the inferred type string, or {@code null} if not known + * @param line 1-based line number in the source file + * @param column 0-based column offset within the line + */ + public CppIdentifierTree(@Nonnull String name, @Nullable String type, int line, int column) { + this.name = name; + this.type = type; + this.line = line; + this.column = column; + } + + /** + * Returns the identifier name exactly as it appears in source code. + * + * @return identifier name, never {@code null} + */ + @Nonnull + public String name() { + return name; + } + + /** + * Returns the inferred type of this identifier, if available. + * + *

For OpenSSL code, this would be something like {@code "EVP_CIPHER_CTX"} or {@code "RSA"}. + * This is used by the engine when matching {@code forObjectTypes(...)} in detection rules. + * + * @return the type string, or {@code null} if not determinable from single-method scope + */ + @Nullable + public String type() { + return type; + } + + @Override + public Kind kind() { + return Kind.IDENTIFIER; + } + + @Override + public int line() { + return line; + } + + @Override + public int column() { + return column; + } + + @Override + public String toString() { + return "CppIdentifierTree{name='" + name + "', type='" + type + "', line=" + line + "}"; + } +} diff --git a/engine/src/main/java/com/ibm/engine/language/cpp/tree/CppTree.java b/engine/src/main/java/com/ibm/engine/language/cpp/tree/CppTree.java new file mode 100644 index 000000000..5e3d2c6b3 --- /dev/null +++ b/engine/src/main/java/com/ibm/engine/language/cpp/tree/CppTree.java @@ -0,0 +1,99 @@ +/* + * Sonar Cryptography Plugin + * Copyright (C) 2025 PQCA + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ibm.engine.language.cpp.tree; + +/** + * Base interface for all C/C++ abstract syntax tree (AST) nodes used by the engine. + * + *

This interface fills the {@code T} (Tree) generic type parameter used throughout the engine. + * It is the C/C++ equivalent of {@code Tree} in the Java language support (from the Sonar Java + * plugin API) and {@code CSharpTree} in the C# language support. + * + *

Because we use ANTLR to parse C/C++ source files directly (rather than a Sonar language + * plugin), we build our own lightweight tree node hierarchy rather than relying on a pre-existing + * AST API. Each concrete implementation of this interface represents a specific syntactic construct + * in C/C++ source code. + * + *

The concrete tree node types are: + * + *

+ * + *

The {@link Kind} enum allows the engine and detection rules to distinguish between node types + * without needing to use {@code instanceof} checks everywhere. + */ +public interface CppTree { + + /** + * Enumerates the kinds of C/C++ tree nodes recognised by the engine. + * + *

This mirrors the role of the {@code Tree.Kind} enum in the Sonar Java plugin API. + */ + enum Kind { + /** A string literal ({@code "AES-256-CBC"}) or numeric constant ({@code 256}). */ + LITERAL, + + /** A bare identifier — a variable name or function name used as an expression. */ + IDENTIFIER, + + /** + * A function call expression, e.g. {@code EVP_EncryptInit_ex(ctx, EVP_aes_256_cbc(), + * NULL, key, iv)}. + */ + METHOD_INVOCATION, + + /** + * A member access expression using {@code ->} or {@code .}, e.g. {@code ctx->cipher} or + * {@code params.key_len}. + */ + MEMBER_ACCESS, + + /** A compound statement enclosed in braces, i.e. a {@code { }} block. */ + BLOCK, + } + + /** + * Returns the kind of this tree node. + * + * @return the {@link Kind} of this node, never {@code null} + */ + Kind kind(); + + /** + * Returns the 1-based line number in the source file where this node begins. + * + *

Used by the engine to record the detection location (file + line) in the CBOM output. + * + * @return line number ≥ 1 + */ + int line(); + + /** + * Returns the 0-based character offset within its line where this node begins. + * + * @return column offset ≥ 0 + */ + int column(); +} diff --git a/engine/src/main/java/com/ibm/engine/language/cpp/tree/Cppliteraltree.java b/engine/src/main/java/com/ibm/engine/language/cpp/tree/Cppliteraltree.java new file mode 100644 index 000000000..78a3b4ee0 --- /dev/null +++ b/engine/src/main/java/com/ibm/engine/language/cpp/tree/Cppliteraltree.java @@ -0,0 +1,130 @@ +/* + * Sonar Cryptography Plugin + * Copyright (C) 2025 PQCA + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ibm.engine.language.cpp.tree; + +import javax.annotation.Nonnull; + +/** + * A C/C++ tree node representing a literal value — either a string literal or a numeric constant. + * + *

Examples of what this node captures: + * + *

+ * + *

This is one of the most important node types for crypto detection, because OpenSSL functions + * frequently receive algorithm identifiers as string literals or key sizes as integer constants. + * + *

Example C code producing this node: + * + *

{@code
+ * EVP_DigestInit_ex(ctx, EVP_get_digestbyname("SHA256"), NULL);
+ * RSA_generate_key_ex(rsa, 2048, e, NULL);
+ * }
+ */ +public final class CppLiteralTree implements CppTree { + + /** The raw text of the literal exactly as it appears in source, e.g. {@code "AES-256-CBC"} or {@code 256}. */ + @Nonnull private final String value; + + /** Whether this literal is a string (quoted) rather than a numeric constant. */ + private final boolean isString; + + private final int line; + private final int column; + + /** + * Creates a new {@code CppLiteralTree}. + * + * @param value the raw literal text as it appears in source code (including quotes for strings) + * @param isString {@code true} if this is a string literal, {@code false} for numeric + * @param line 1-based line number in the source file + * @param column 0-based column offset within the line + */ + public CppLiteralTree(@Nonnull String value, boolean isString, int line, int column) { + this.value = value; + this.isString = isString; + this.line = line; + this.column = column; + } + + /** + * Returns the raw text of the literal as it appears in source code. + * + *

For string literals this includes the surrounding quotes, e.g. {@code "AES-256-CBC"}. + * Call {@link #unquotedValue()} to get the content without quotes. + * + * @return the raw literal text, never {@code null} + */ + @Nonnull + public String value() { + return value; + } + + /** + * Returns the string content without surrounding double-quotes. + * + *

For example, if the source code contained {@code "AES-256-CBC"}, this method returns + * {@code AES-256-CBC}. + * + *

For numeric literals, this is the same as {@link #value()}. + * + * @return the unquoted value + */ + @Nonnull + public String unquotedValue() { + if (isString && value.length() >= 2 && value.startsWith("\"") && value.endsWith("\"")) { + return value.substring(1, value.length() - 1); + } + return value; + } + + /** + * Returns {@code true} if this literal is a string (enclosed in double quotes). + * + * @return {@code true} for string literals, {@code false} for numeric constants + */ + public boolean isString() { + return isString; + } + + @Override + public Kind kind() { + return Kind.LITERAL; + } + + @Override + public int line() { + return line; + } + + @Override + public int column() { + return column; + } + + @Override + public String toString() { + return "CppLiteralTree{value=" + value + ", line=" + line + "}"; + } +} diff --git a/engine/src/main/java/com/ibm/engine/language/cpp/tree/Cppmemberaccesstree.java b/engine/src/main/java/com/ibm/engine/language/cpp/tree/Cppmemberaccesstree.java new file mode 100644 index 000000000..f48be0dcf --- /dev/null +++ b/engine/src/main/java/com/ibm/engine/language/cpp/tree/Cppmemberaccesstree.java @@ -0,0 +1,164 @@ +/* + * Sonar Cryptography Plugin + * Copyright (C) 2025 PQCA + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ibm.engine.language.cpp.tree; + +import javax.annotation.Nonnull; + +/** + * A C/C++ tree node representing a member access expression using {@code ->} or {@code .}. + * + *

In C, struct fields and pointer-to-struct fields are frequently used when working with OpenSSL + * types. This node captures expressions like: + * + *

+ * + *

Example C code producing this node: + * + *

{@code
+ * // Arrow operator (pointer to struct)
+ * EVP_CIPHER_CTX *ctx = EVP_CIPHER_CTX_new();
+ * ctx->encrypt = 1;
+ *
+ * // Dot operator (direct struct access)
+ * RSA_METHOD method;
+ * method.rsa_sign = my_sign_function;
+ * }
+ * + *

The engine uses this node primarily to resolve the object type when matching {@code + * forObjectTypes(...)} in detection rules. By tracking what type {@code ctx} is, the engine can + * confirm that {@code EVP_EncryptInit_ex(ctx, ...)} is indeed operating on an {@code + * EVP_CIPHER_CTX}. + */ +public final class CppMemberAccessTree implements CppTree { + + /** Enum distinguishing between {@code ->} (arrow) and {@code .} (dot) access. */ + public enum AccessType { + /** Pointer member access via {@code ->}. */ + ARROW, + /** Direct struct member access via {@code .}. */ + DOT + } + + /** The expression on the left side of the operator, e.g. {@code ctx} in {@code ctx->cipher}. */ + @Nonnull private final CppTree object; + + /** The member name on the right side of the operator, e.g. {@code "cipher"} in {@code ctx->cipher}. */ + @Nonnull private final String memberName; + + /** Whether this is an arrow ({@code ->}) or dot ({@code .}) access. */ + @Nonnull private final AccessType accessType; + + private final int line; + private final int column; + + /** + * Creates a new {@code CppMemberAccessTree}. + * + * @param object the tree node representing the left-hand side expression + * @param memberName the name of the struct member being accessed + * @param accessType whether this is {@code ->} or {@code .} access + * @param line 1-based line number in the source file + * @param column 0-based column offset within the line + */ + public CppMemberAccessTree( + @Nonnull CppTree object, + @Nonnull String memberName, + @Nonnull AccessType accessType, + int line, + int column) { + this.object = object; + this.memberName = memberName; + this.accessType = accessType; + this.line = line; + this.column = column; + } + + /** + * Returns the tree node representing the object (left-hand side of the access operator). + * + * @return the object expression tree, never {@code null} + */ + @Nonnull + public CppTree object() { + return object; + } + + /** + * Returns the name of the struct member being accessed. + * + * @return member name, never {@code null} + */ + @Nonnull + public String memberName() { + return memberName; + } + + /** + * Returns whether this is a {@code ->} (arrow) or {@code .} (dot) member access. + * + * @return the access type, never {@code null} + */ + @Nonnull + public AccessType accessType() { + return accessType; + } + + /** Returns {@code true} if this is a {@code ->} (pointer) member access. */ + public boolean isArrow() { + return accessType == AccessType.ARROW; + } + + /** Returns {@code true} if this is a {@code .} (direct) member access. */ + public boolean isDot() { + return accessType == AccessType.DOT; + } + + @Override + public Kind kind() { + return Kind.MEMBER_ACCESS; + } + + @Override + public int line() { + return line; + } + + @Override + public int column() { + return column; + } + + @Override + public String toString() { + String op = accessType == AccessType.ARROW ? "->" : "."; + return "CppMemberAccessTree{object=" + + object + + ", op='" + + op + + "', member='" + + memberName + + "', line=" + + line + + "}"; + } +} diff --git a/pom.xml b/pom.xml index a32d3c773..f411054af 100644 --- a/pom.xml +++ b/pom.xml @@ -14,6 +14,7 @@ python go csharp + cpp engine output common diff --git a/sonar-cryptography-plugin/pom.xml b/sonar-cryptography-plugin/pom.xml index 9ac6bfe6a..a181f9eee 100644 --- a/sonar-cryptography-plugin/pom.xml +++ b/sonar-cryptography-plugin/pom.xml @@ -47,6 +47,12 @@ 2.0.0-SNAPSHOT compile + + com.ibm + cpp + 2.0.0-SNAPSHOT + compile + @@ -71,7 +77,7 @@ Sonar Crypto Plugin com.ibm.plugin.CryptographyPlugin - java,jsp,py,ipynb,go,cs + java,jsp,py,ipynb,go,cs,c,cpp,h ${sonar.minVersion} true true @@ -104,7 +110,7 @@ NOTICE* - + @@ -174,4 +180,4 @@ - \ No newline at end of file +