mirror of
https://github.com/azaion/detections-semantic.git
synced 2026-04-22 22:36:38 +00:00
8e2ecf50fd
Made-with: Cursor
790 lines
23 KiB
YAML
790 lines
23 KiB
YAML
# =============================================================================
|
|
# AQE Skill Evaluation Test Suite: Security Testing v1.0.0
|
|
# =============================================================================
|
|
#
|
|
# Comprehensive evaluation suite for the security-testing skill per ADR-056.
|
|
# Tests OWASP Top 10 2021 detection, severity classification, remediation
|
|
# quality, and cross-model consistency.
|
|
#
|
|
# Schema: .claude/skills/.validation/schemas/skill-eval.schema.json
|
|
# Validator: .claude/skills/security-testing/scripts/validate-config.json
|
|
#
|
|
# Coverage:
|
|
# - OWASP A01:2021 - Broken Access Control
|
|
# - OWASP A02:2021 - Cryptographic Failures
|
|
# - OWASP A03:2021 - Injection (SQL, XSS, Command)
|
|
# - OWASP A07:2021 - Identification and Authentication Failures
|
|
# - Negative tests (no false positives on secure code)
|
|
#
|
|
# =============================================================================
|
|
|
|
skill: security-testing
|
|
version: 1.0.0
|
|
description: >
|
|
Comprehensive evaluation suite for the security-testing skill.
|
|
Tests OWASP Top 10 2021 detection capabilities, CWE classification accuracy,
|
|
CVSS scoring, severity classification, and remediation quality.
|
|
Supports multi-model testing and integrates with ReasoningBank for
|
|
continuous improvement.
|
|
|
|
# =============================================================================
|
|
# Multi-Model Configuration
|
|
# =============================================================================
|
|
|
|
models_to_test:
|
|
- claude-3.5-sonnet # Primary model (high accuracy expected)
|
|
- claude-3-haiku # Fast model (minimum quality threshold)
|
|
- gpt-4o # Cross-vendor validation
|
|
|
|
# =============================================================================
|
|
# MCP Integration Configuration
|
|
# =============================================================================
|
|
|
|
mcp_integration:
|
|
enabled: true
|
|
namespace: skill-validation
|
|
|
|
# Query existing security patterns before running evals
|
|
query_patterns: true
|
|
|
|
# Track each test outcome for learning feedback loop
|
|
track_outcomes: true
|
|
|
|
# Store successful patterns after evals complete
|
|
store_patterns: true
|
|
|
|
# Share learning with fleet coordinator agents
|
|
share_learning: true
|
|
|
|
# Update quality gate with validation metrics
|
|
update_quality_gate: true
|
|
|
|
# Target agents for learning distribution
|
|
target_agents:
|
|
- qe-learning-coordinator
|
|
- qe-queen-coordinator
|
|
- qe-security-scanner
|
|
- qe-security-auditor
|
|
|
|
# =============================================================================
|
|
# ReasoningBank Learning Configuration
|
|
# =============================================================================
|
|
|
|
learning:
|
|
store_success_patterns: true
|
|
store_failure_patterns: true
|
|
pattern_ttl_days: 90
|
|
min_confidence_to_store: 0.7
|
|
cross_model_comparison: true
|
|
|
|
# =============================================================================
|
|
# Result Format Configuration
|
|
# =============================================================================
|
|
|
|
result_format:
|
|
json_output: true
|
|
markdown_report: true
|
|
include_raw_output: false
|
|
include_timing: true
|
|
include_token_usage: true
|
|
|
|
# =============================================================================
|
|
# Environment Setup
|
|
# =============================================================================
|
|
|
|
setup:
|
|
required_tools:
|
|
- jq # JSON parsing (required)
|
|
- npm # Dependency audit (optional but recommended)
|
|
|
|
environment_variables:
|
|
SECURITY_SCAN_DEPTH: "deep"
|
|
OWASP_ENABLED: "true"
|
|
SEVERITY_THRESHOLD: "medium"
|
|
|
|
fixtures:
|
|
- name: vulnerable_express_app
|
|
path: fixtures/vulnerable-express-app.js
|
|
content: |
|
|
const express = require('express');
|
|
const app = express();
|
|
|
|
// SQL Injection vulnerability
|
|
app.get('/user', (req, res) => {
|
|
const query = `SELECT * FROM users WHERE id = ${req.params.id}`;
|
|
db.query(query);
|
|
});
|
|
|
|
// XSS vulnerability
|
|
app.get('/profile', (req, res) => {
|
|
res.send(`<h1>Hello ${req.query.name}</h1>`);
|
|
});
|
|
|
|
// Path Traversal vulnerability
|
|
app.get('/file', (req, res) => {
|
|
const path = './uploads/' + req.query.filename;
|
|
res.sendFile(path);
|
|
});
|
|
|
|
# =============================================================================
|
|
# TEST CASES
|
|
# =============================================================================
|
|
|
|
test_cases:
|
|
# ---------------------------------------------------------------------------
|
|
# CATEGORY: SQL Injection (OWASP A03:2021)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
- id: tc001_sql_injection_string_concat
|
|
description: "Detect SQL injection via string concatenation in Node.js"
|
|
category: injection
|
|
priority: critical
|
|
|
|
input:
|
|
code: |
|
|
const express = require('express');
|
|
const mysql = require('mysql');
|
|
const app = express();
|
|
|
|
app.get('/api/users', (req, res) => {
|
|
const userId = req.params.id;
|
|
const query = `SELECT * FROM users WHERE id = ${userId}`;
|
|
db.query(query, (err, results) => {
|
|
res.json(results);
|
|
});
|
|
});
|
|
context:
|
|
language: javascript
|
|
framework: express
|
|
environment: production
|
|
|
|
expected_output:
|
|
must_contain:
|
|
- "SQL injection"
|
|
- "parameterized"
|
|
must_not_contain:
|
|
- "no vulnerabilities"
|
|
- "secure"
|
|
must_match_regex:
|
|
- "CWE-89|CWE-564"
|
|
- "A03:20[21][0-9]"
|
|
severity_classification: critical
|
|
finding_count:
|
|
min: 1
|
|
max: 3
|
|
recommendation_count:
|
|
min: 1
|
|
|
|
validation:
|
|
schema_check: true
|
|
keyword_match_threshold: 0.8
|
|
reasoning_quality_min: 0.7
|
|
grading_rubric:
|
|
completeness: 0.3
|
|
accuracy: 0.5
|
|
actionability: 0.2
|
|
|
|
timeout_ms: 30000
|
|
|
|
- id: tc002_sql_injection_parameterized_safe
|
|
description: "Verify parameterized queries are NOT flagged as vulnerable"
|
|
category: injection
|
|
priority: high
|
|
|
|
input:
|
|
code: |
|
|
app.get('/api/users', (req, res) => {
|
|
const userId = parseInt(req.params.id, 10);
|
|
db.query('SELECT * FROM users WHERE id = ?', [userId], (err, results) => {
|
|
res.json(results);
|
|
});
|
|
});
|
|
context:
|
|
language: javascript
|
|
framework: express
|
|
|
|
expected_output:
|
|
must_contain:
|
|
- "parameterized"
|
|
- "secure"
|
|
must_not_contain:
|
|
- "SQL injection"
|
|
- "critical"
|
|
- "vulnerable"
|
|
severity_classification: info
|
|
finding_count:
|
|
max: 1
|
|
|
|
validation:
|
|
schema_check: true
|
|
keyword_match_threshold: 0.7
|
|
allow_partial: true
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CATEGORY: Cross-Site Scripting (OWASP A03:2021)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
- id: tc003_xss_reflected_html_output
|
|
description: "Detect reflected XSS in unescaped HTML output"
|
|
category: injection
|
|
priority: critical
|
|
|
|
input:
|
|
code: |
|
|
app.get('/profile', (req, res) => {
|
|
const name = req.query.name;
|
|
res.send(`
|
|
<html>
|
|
<body>
|
|
<h1>Welcome, ${name}!</h1>
|
|
<p>Your profile has been loaded.</p>
|
|
</body>
|
|
</html>
|
|
`);
|
|
});
|
|
context:
|
|
language: javascript
|
|
framework: express
|
|
|
|
expected_output:
|
|
must_contain:
|
|
- "XSS"
|
|
- "cross-site scripting"
|
|
- "sanitize"
|
|
- "escape"
|
|
must_match_regex:
|
|
- "CWE-79"
|
|
severity_classification: high
|
|
finding_count:
|
|
min: 1
|
|
|
|
validation:
|
|
schema_check: true
|
|
keyword_match_threshold: 0.8
|
|
reasoning_quality_min: 0.75
|
|
|
|
- id: tc004_xss_dom_based_innerhtml
|
|
description: "Detect DOM-based XSS via innerHTML assignment"
|
|
category: injection
|
|
priority: high
|
|
|
|
input:
|
|
code: |
|
|
// Client-side JavaScript
|
|
const params = new URLSearchParams(window.location.search);
|
|
const message = params.get('msg');
|
|
document.getElementById('output').innerHTML = message;
|
|
context:
|
|
language: javascript
|
|
framework: vanilla
|
|
environment: production
|
|
|
|
expected_output:
|
|
must_contain:
|
|
- "DOM"
|
|
- "XSS"
|
|
- "innerHTML"
|
|
- "textContent"
|
|
must_match_regex:
|
|
- "CWE-79"
|
|
severity_classification: high
|
|
|
|
validation:
|
|
schema_check: true
|
|
keyword_match_threshold: 0.7
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CATEGORY: Authentication Failures (OWASP A07:2021)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
- id: tc005_hardcoded_credentials
|
|
description: "Detect hardcoded credentials and API keys"
|
|
category: authentication
|
|
priority: critical
|
|
|
|
input:
|
|
code: |
|
|
const ADMIN_PASSWORD = 'admin123';
|
|
const API_KEY = 'sk-1234567890abcdef';
|
|
const DATABASE_URL = 'postgres://admin:password123@localhost/db';
|
|
|
|
app.post('/login', (req, res) => {
|
|
if (req.body.password === ADMIN_PASSWORD) {
|
|
req.session.isAdmin = true;
|
|
res.send('Login successful');
|
|
}
|
|
});
|
|
context:
|
|
language: javascript
|
|
framework: express
|
|
|
|
expected_output:
|
|
must_contain:
|
|
- "hardcoded"
|
|
- "credentials"
|
|
- "secret"
|
|
- "environment variable"
|
|
must_match_regex:
|
|
- "CWE-798|CWE-259"
|
|
severity_classification: critical
|
|
finding_count:
|
|
min: 2
|
|
|
|
validation:
|
|
schema_check: true
|
|
keyword_match_threshold: 0.8
|
|
reasoning_quality_min: 0.8
|
|
|
|
- id: tc006_weak_password_hashing
|
|
description: "Detect weak password hashing algorithms (MD5, SHA1)"
|
|
category: authentication
|
|
priority: high
|
|
|
|
input:
|
|
code: |
|
|
const crypto = require('crypto');
|
|
|
|
function hashPassword(password) {
|
|
return crypto.createHash('md5').update(password).digest('hex');
|
|
}
|
|
|
|
function verifyPassword(password, hash) {
|
|
return hashPassword(password) === hash;
|
|
}
|
|
context:
|
|
language: javascript
|
|
framework: nodejs
|
|
|
|
expected_output:
|
|
must_contain:
|
|
- "MD5"
|
|
- "weak"
|
|
- "bcrypt"
|
|
- "argon2"
|
|
must_match_regex:
|
|
- "CWE-327|CWE-328|CWE-916"
|
|
severity_classification: high
|
|
finding_count:
|
|
min: 1
|
|
|
|
validation:
|
|
schema_check: true
|
|
keyword_match_threshold: 0.8
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CATEGORY: Broken Access Control (OWASP A01:2021)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
- id: tc007_idor_missing_authorization
|
|
description: "Detect IDOR vulnerability with missing authorization check"
|
|
category: authorization
|
|
priority: critical
|
|
|
|
input:
|
|
code: |
|
|
app.get('/api/users/:id/profile', (req, res) => {
|
|
// No authorization check - any user can access any profile
|
|
const userId = req.params.id;
|
|
db.query('SELECT * FROM profiles WHERE user_id = ?', [userId])
|
|
.then(profile => res.json(profile));
|
|
});
|
|
|
|
app.delete('/api/users/:id', (req, res) => {
|
|
// No check if requesting user owns this account
|
|
db.query('DELETE FROM users WHERE id = ?', [req.params.id]);
|
|
res.send('User deleted');
|
|
});
|
|
context:
|
|
language: javascript
|
|
framework: express
|
|
|
|
expected_output:
|
|
must_contain:
|
|
- "authorization"
|
|
- "access control"
|
|
- "IDOR"
|
|
- "ownership"
|
|
must_match_regex:
|
|
- "CWE-639|CWE-284|CWE-862"
|
|
- "A01:2021"
|
|
severity_classification: critical
|
|
|
|
validation:
|
|
schema_check: true
|
|
keyword_match_threshold: 0.7
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CATEGORY: Cryptographic Failures (OWASP A02:2021)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
- id: tc008_weak_encryption_des
|
|
description: "Detect use of weak encryption algorithms (DES, RC4)"
|
|
category: cryptography
|
|
priority: high
|
|
|
|
input:
|
|
code: |
|
|
const crypto = require('crypto');
|
|
|
|
function encryptData(data, key) {
|
|
const cipher = crypto.createCipher('des', key);
|
|
return cipher.update(data, 'utf8', 'hex') + cipher.final('hex');
|
|
}
|
|
|
|
function decryptData(data, key) {
|
|
const decipher = crypto.createDecipher('des', key);
|
|
return decipher.update(data, 'hex', 'utf8') + decipher.final('utf8');
|
|
}
|
|
context:
|
|
language: javascript
|
|
framework: nodejs
|
|
|
|
expected_output:
|
|
must_contain:
|
|
- "DES"
|
|
- "weak"
|
|
- "deprecated"
|
|
- "AES"
|
|
must_match_regex:
|
|
- "CWE-327|CWE-328"
|
|
- "A02:2021"
|
|
severity_classification: high
|
|
|
|
validation:
|
|
schema_check: true
|
|
keyword_match_threshold: 0.7
|
|
|
|
- id: tc009_plaintext_password_storage
|
|
description: "Detect plaintext password storage"
|
|
category: cryptography
|
|
priority: critical
|
|
|
|
input:
|
|
code: |
|
|
class User {
|
|
constructor(email, password) {
|
|
this.email = email;
|
|
this.password = password; // Stored in plaintext!
|
|
}
|
|
|
|
save() {
|
|
db.query('INSERT INTO users (email, password) VALUES (?, ?)',
|
|
[this.email, this.password]);
|
|
}
|
|
}
|
|
context:
|
|
language: javascript
|
|
framework: nodejs
|
|
|
|
expected_output:
|
|
must_contain:
|
|
- "plaintext"
|
|
- "password"
|
|
- "hash"
|
|
- "bcrypt"
|
|
must_match_regex:
|
|
- "CWE-256|CWE-312"
|
|
- "A02:2021"
|
|
severity_classification: critical
|
|
|
|
validation:
|
|
schema_check: true
|
|
keyword_match_threshold: 0.8
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CATEGORY: Path Traversal (Related to A01:2021)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
- id: tc010_path_traversal_file_access
|
|
description: "Detect path traversal vulnerability in file access"
|
|
category: injection
|
|
priority: critical
|
|
|
|
input:
|
|
code: |
|
|
const fs = require('fs');
|
|
|
|
app.get('/download', (req, res) => {
|
|
const filename = req.query.file;
|
|
const filepath = './uploads/' + filename;
|
|
res.sendFile(filepath);
|
|
});
|
|
|
|
app.get('/read', (req, res) => {
|
|
const content = fs.readFileSync('./data/' + req.params.name);
|
|
res.send(content);
|
|
});
|
|
context:
|
|
language: javascript
|
|
framework: express
|
|
|
|
expected_output:
|
|
must_contain:
|
|
- "path traversal"
|
|
- "directory traversal"
|
|
- "../"
|
|
- "sanitize"
|
|
must_match_regex:
|
|
- "CWE-22|CWE-23"
|
|
severity_classification: critical
|
|
|
|
validation:
|
|
schema_check: true
|
|
keyword_match_threshold: 0.7
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CATEGORY: Negative Tests (No False Positives)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
- id: tc011_secure_code_no_false_positives
|
|
description: "Verify secure code is NOT flagged as vulnerable"
|
|
category: negative
|
|
priority: critical
|
|
|
|
input:
|
|
code: |
|
|
const express = require('express');
|
|
const helmet = require('helmet');
|
|
const rateLimit = require('express-rate-limit');
|
|
const bcrypt = require('bcrypt');
|
|
const validator = require('validator');
|
|
|
|
const app = express();
|
|
app.use(helmet());
|
|
app.use(rateLimit({ windowMs: 15 * 60 * 1000, max: 100 }));
|
|
|
|
app.post('/api/users', async (req, res) => {
|
|
const { email, password } = req.body;
|
|
|
|
// Input validation
|
|
if (!validator.isEmail(email)) {
|
|
return res.status(400).json({ error: 'Invalid email' });
|
|
}
|
|
|
|
// Secure password hashing
|
|
const hashedPassword = await bcrypt.hash(password, 12);
|
|
|
|
// Parameterized query
|
|
await db.query(
|
|
'INSERT INTO users (email, password) VALUES ($1, $2)',
|
|
[email, hashedPassword]
|
|
);
|
|
|
|
res.status(201).json({ message: 'User created' });
|
|
});
|
|
context:
|
|
language: javascript
|
|
framework: express
|
|
environment: production
|
|
|
|
expected_output:
|
|
must_contain:
|
|
- "secure"
|
|
- "best practice"
|
|
must_not_contain:
|
|
- "SQL injection"
|
|
- "XSS"
|
|
- "critical vulnerability"
|
|
- "high severity"
|
|
finding_count:
|
|
max: 2 # Allow informational findings only
|
|
|
|
validation:
|
|
schema_check: true
|
|
keyword_match_threshold: 0.6
|
|
allow_partial: true
|
|
|
|
- id: tc012_secure_auth_implementation
|
|
description: "Verify secure authentication is recognized as safe"
|
|
category: negative
|
|
priority: high
|
|
|
|
input:
|
|
code: |
|
|
const bcrypt = require('bcrypt');
|
|
const jwt = require('jsonwebtoken');
|
|
|
|
async function login(email, password) {
|
|
const user = await User.findByEmail(email);
|
|
if (!user) {
|
|
return { error: 'Invalid credentials' };
|
|
}
|
|
|
|
const match = await bcrypt.compare(password, user.passwordHash);
|
|
if (!match) {
|
|
return { error: 'Invalid credentials' };
|
|
}
|
|
|
|
const token = jwt.sign(
|
|
{ userId: user.id },
|
|
process.env.JWT_SECRET,
|
|
{ expiresIn: '1h' }
|
|
);
|
|
|
|
return { token };
|
|
}
|
|
context:
|
|
language: javascript
|
|
framework: nodejs
|
|
|
|
expected_output:
|
|
must_contain:
|
|
- "bcrypt"
|
|
- "jwt"
|
|
- "secure"
|
|
must_not_contain:
|
|
- "vulnerable"
|
|
- "critical"
|
|
- "hardcoded"
|
|
severity_classification: info
|
|
|
|
validation:
|
|
schema_check: true
|
|
allow_partial: true
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CATEGORY: Python Security (Multi-language Support)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
- id: tc013_python_sql_injection
|
|
description: "Detect SQL injection in Python Flask application"
|
|
category: injection
|
|
priority: critical
|
|
|
|
input:
|
|
code: |
|
|
from flask import Flask, request
|
|
import sqlite3
|
|
|
|
app = Flask(__name__)
|
|
|
|
@app.route('/user')
|
|
def get_user():
|
|
user_id = request.args.get('id')
|
|
conn = sqlite3.connect('users.db')
|
|
cursor = conn.cursor()
|
|
cursor.execute(f"SELECT * FROM users WHERE id = {user_id}")
|
|
return str(cursor.fetchone())
|
|
context:
|
|
language: python
|
|
framework: flask
|
|
|
|
expected_output:
|
|
must_contain:
|
|
- "SQL injection"
|
|
- "parameterized"
|
|
- "f-string"
|
|
must_match_regex:
|
|
- "CWE-89"
|
|
severity_classification: critical
|
|
finding_count:
|
|
min: 1
|
|
|
|
validation:
|
|
schema_check: true
|
|
keyword_match_threshold: 0.7
|
|
|
|
- id: tc014_python_ssti_jinja
|
|
description: "Detect Server-Side Template Injection in Jinja2"
|
|
category: injection
|
|
priority: critical
|
|
|
|
input:
|
|
code: |
|
|
from flask import Flask, request, render_template_string
|
|
|
|
app = Flask(__name__)
|
|
|
|
@app.route('/render')
|
|
def render():
|
|
template = request.args.get('template')
|
|
return render_template_string(template)
|
|
context:
|
|
language: python
|
|
framework: flask
|
|
|
|
expected_output:
|
|
must_contain:
|
|
- "SSTI"
|
|
- "template injection"
|
|
- "render_template_string"
|
|
- "Jinja2"
|
|
must_match_regex:
|
|
- "CWE-94|CWE-1336"
|
|
severity_classification: critical
|
|
|
|
validation:
|
|
schema_check: true
|
|
keyword_match_threshold: 0.7
|
|
|
|
- id: tc015_python_pickle_deserialization
|
|
description: "Detect insecure deserialization with pickle"
|
|
category: injection
|
|
priority: critical
|
|
|
|
input:
|
|
code: |
|
|
import pickle
|
|
from flask import Flask, request
|
|
|
|
app = Flask(__name__)
|
|
|
|
@app.route('/load')
|
|
def load_data():
|
|
data = request.get_data()
|
|
obj = pickle.loads(data)
|
|
return str(obj)
|
|
context:
|
|
language: python
|
|
framework: flask
|
|
|
|
expected_output:
|
|
must_contain:
|
|
- "pickle"
|
|
- "deserialization"
|
|
- "untrusted"
|
|
- "RCE"
|
|
must_match_regex:
|
|
- "CWE-502"
|
|
- "A08:2021"
|
|
severity_classification: critical
|
|
|
|
validation:
|
|
schema_check: true
|
|
keyword_match_threshold: 0.7
|
|
|
|
# =============================================================================
|
|
# SUCCESS CRITERIA
|
|
# =============================================================================
|
|
|
|
success_criteria:
|
|
# Overall pass rate (90% of tests must pass)
|
|
pass_rate: 0.9
|
|
|
|
# Critical tests must ALL pass (100%)
|
|
critical_pass_rate: 1.0
|
|
|
|
# Average reasoning quality score
|
|
avg_reasoning_quality: 0.75
|
|
|
|
# Maximum suite execution time (5 minutes)
|
|
max_execution_time_ms: 300000
|
|
|
|
# Maximum variance between model results (15%)
|
|
cross_model_variance: 0.15
|
|
|
|
# =============================================================================
|
|
# METADATA
|
|
# =============================================================================
|
|
|
|
metadata:
|
|
author: "qe-security-auditor"
|
|
created: "2026-02-02"
|
|
last_updated: "2026-02-02"
|
|
coverage_target: >
|
|
OWASP Top 10 2021: A01 (Broken Access Control), A02 (Cryptographic Failures),
|
|
A03 (Injection - SQL, XSS, SSTI, Command), A07 (Authentication Failures),
|
|
A08 (Software Integrity - Deserialization). Covers JavaScript/Node.js
|
|
Express apps and Python Flask apps. 15 test cases with 90% pass rate
|
|
requirement and 100% critical pass rate.
|