mirror of
https://github.com/azaion/detections.git
synced 2026-04-22 22:46:31 +00:00
Update .gitignore to include additional file types and directories for Python projects, enhancing environment management and build artifacts exclusion.
This commit is contained in:
@@ -0,0 +1,789 @@
|
||||
# =============================================================================
|
||||
# AQE Skill Evaluation Test Suite: Security Testing v1.0.0
|
||||
# =============================================================================
|
||||
#
|
||||
# Comprehensive evaluation suite for the security-testing skill per ADR-056.
|
||||
# Tests OWASP Top 10 2021 detection, severity classification, remediation
|
||||
# quality, and cross-model consistency.
|
||||
#
|
||||
# Schema: .claude/skills/.validation/schemas/skill-eval.schema.json
|
||||
# Validator: .claude/skills/security-testing/scripts/validate-config.json
|
||||
#
|
||||
# Coverage:
|
||||
# - OWASP A01:2021 - Broken Access Control
|
||||
# - OWASP A02:2021 - Cryptographic Failures
|
||||
# - OWASP A03:2021 - Injection (SQL, XSS, Command)
|
||||
# - OWASP A07:2021 - Identification and Authentication Failures
|
||||
# - Negative tests (no false positives on secure code)
|
||||
#
|
||||
# =============================================================================
|
||||
|
||||
skill: security-testing
|
||||
version: 1.0.0
|
||||
description: >
|
||||
Comprehensive evaluation suite for the security-testing skill.
|
||||
Tests OWASP Top 10 2021 detection capabilities, CWE classification accuracy,
|
||||
CVSS scoring, severity classification, and remediation quality.
|
||||
Supports multi-model testing and integrates with ReasoningBank for
|
||||
continuous improvement.
|
||||
|
||||
# =============================================================================
|
||||
# Multi-Model Configuration
|
||||
# =============================================================================
|
||||
|
||||
models_to_test:
|
||||
- claude-3.5-sonnet # Primary model (high accuracy expected)
|
||||
- claude-3-haiku # Fast model (minimum quality threshold)
|
||||
- gpt-4o # Cross-vendor validation
|
||||
|
||||
# =============================================================================
|
||||
# MCP Integration Configuration
|
||||
# =============================================================================
|
||||
|
||||
mcp_integration:
|
||||
enabled: true
|
||||
namespace: skill-validation
|
||||
|
||||
# Query existing security patterns before running evals
|
||||
query_patterns: true
|
||||
|
||||
# Track each test outcome for learning feedback loop
|
||||
track_outcomes: true
|
||||
|
||||
# Store successful patterns after evals complete
|
||||
store_patterns: true
|
||||
|
||||
# Share learning with fleet coordinator agents
|
||||
share_learning: true
|
||||
|
||||
# Update quality gate with validation metrics
|
||||
update_quality_gate: true
|
||||
|
||||
# Target agents for learning distribution
|
||||
target_agents:
|
||||
- qe-learning-coordinator
|
||||
- qe-queen-coordinator
|
||||
- qe-security-scanner
|
||||
- qe-security-auditor
|
||||
|
||||
# =============================================================================
|
||||
# ReasoningBank Learning Configuration
|
||||
# =============================================================================
|
||||
|
||||
learning:
|
||||
store_success_patterns: true
|
||||
store_failure_patterns: true
|
||||
pattern_ttl_days: 90
|
||||
min_confidence_to_store: 0.7
|
||||
cross_model_comparison: true
|
||||
|
||||
# =============================================================================
|
||||
# Result Format Configuration
|
||||
# =============================================================================
|
||||
|
||||
result_format:
|
||||
json_output: true
|
||||
markdown_report: true
|
||||
include_raw_output: false
|
||||
include_timing: true
|
||||
include_token_usage: true
|
||||
|
||||
# =============================================================================
|
||||
# Environment Setup
|
||||
# =============================================================================
|
||||
|
||||
setup:
|
||||
required_tools:
|
||||
- jq # JSON parsing (required)
|
||||
- npm # Dependency audit (optional but recommended)
|
||||
|
||||
environment_variables:
|
||||
SECURITY_SCAN_DEPTH: "deep"
|
||||
OWASP_ENABLED: "true"
|
||||
SEVERITY_THRESHOLD: "medium"
|
||||
|
||||
fixtures:
|
||||
- name: vulnerable_express_app
|
||||
path: fixtures/vulnerable-express-app.js
|
||||
content: |
|
||||
const express = require('express');
|
||||
const app = express();
|
||||
|
||||
// SQL Injection vulnerability
|
||||
app.get('/user', (req, res) => {
|
||||
const query = `SELECT * FROM users WHERE id = ${req.params.id}`;
|
||||
db.query(query);
|
||||
});
|
||||
|
||||
// XSS vulnerability
|
||||
app.get('/profile', (req, res) => {
|
||||
res.send(`<h1>Hello ${req.query.name}</h1>`);
|
||||
});
|
||||
|
||||
// Path Traversal vulnerability
|
||||
app.get('/file', (req, res) => {
|
||||
const path = './uploads/' + req.query.filename;
|
||||
res.sendFile(path);
|
||||
});
|
||||
|
||||
# =============================================================================
|
||||
# TEST CASES
|
||||
# =============================================================================
|
||||
|
||||
test_cases:
|
||||
# ---------------------------------------------------------------------------
|
||||
# CATEGORY: SQL Injection (OWASP A03:2021)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
- id: tc001_sql_injection_string_concat
|
||||
description: "Detect SQL injection via string concatenation in Node.js"
|
||||
category: injection
|
||||
priority: critical
|
||||
|
||||
input:
|
||||
code: |
|
||||
const express = require('express');
|
||||
const mysql = require('mysql');
|
||||
const app = express();
|
||||
|
||||
app.get('/api/users', (req, res) => {
|
||||
const userId = req.params.id;
|
||||
const query = `SELECT * FROM users WHERE id = ${userId}`;
|
||||
db.query(query, (err, results) => {
|
||||
res.json(results);
|
||||
});
|
||||
});
|
||||
context:
|
||||
language: javascript
|
||||
framework: express
|
||||
environment: production
|
||||
|
||||
expected_output:
|
||||
must_contain:
|
||||
- "SQL injection"
|
||||
- "parameterized"
|
||||
must_not_contain:
|
||||
- "no vulnerabilities"
|
||||
- "secure"
|
||||
must_match_regex:
|
||||
- "CWE-89|CWE-564"
|
||||
- "A03:20[21][0-9]"
|
||||
severity_classification: critical
|
||||
finding_count:
|
||||
min: 1
|
||||
max: 3
|
||||
recommendation_count:
|
||||
min: 1
|
||||
|
||||
validation:
|
||||
schema_check: true
|
||||
keyword_match_threshold: 0.8
|
||||
reasoning_quality_min: 0.7
|
||||
grading_rubric:
|
||||
completeness: 0.3
|
||||
accuracy: 0.5
|
||||
actionability: 0.2
|
||||
|
||||
timeout_ms: 30000
|
||||
|
||||
- id: tc002_sql_injection_parameterized_safe
|
||||
description: "Verify parameterized queries are NOT flagged as vulnerable"
|
||||
category: injection
|
||||
priority: high
|
||||
|
||||
input:
|
||||
code: |
|
||||
app.get('/api/users', (req, res) => {
|
||||
const userId = parseInt(req.params.id, 10);
|
||||
db.query('SELECT * FROM users WHERE id = ?', [userId], (err, results) => {
|
||||
res.json(results);
|
||||
});
|
||||
});
|
||||
context:
|
||||
language: javascript
|
||||
framework: express
|
||||
|
||||
expected_output:
|
||||
must_contain:
|
||||
- "parameterized"
|
||||
- "secure"
|
||||
must_not_contain:
|
||||
- "SQL injection"
|
||||
- "critical"
|
||||
- "vulnerable"
|
||||
severity_classification: info
|
||||
finding_count:
|
||||
max: 1
|
||||
|
||||
validation:
|
||||
schema_check: true
|
||||
keyword_match_threshold: 0.7
|
||||
allow_partial: true
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CATEGORY: Cross-Site Scripting (OWASP A03:2021)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
- id: tc003_xss_reflected_html_output
|
||||
description: "Detect reflected XSS in unescaped HTML output"
|
||||
category: injection
|
||||
priority: critical
|
||||
|
||||
input:
|
||||
code: |
|
||||
app.get('/profile', (req, res) => {
|
||||
const name = req.query.name;
|
||||
res.send(`
|
||||
<html>
|
||||
<body>
|
||||
<h1>Welcome, ${name}!</h1>
|
||||
<p>Your profile has been loaded.</p>
|
||||
</body>
|
||||
</html>
|
||||
`);
|
||||
});
|
||||
context:
|
||||
language: javascript
|
||||
framework: express
|
||||
|
||||
expected_output:
|
||||
must_contain:
|
||||
- "XSS"
|
||||
- "cross-site scripting"
|
||||
- "sanitize"
|
||||
- "escape"
|
||||
must_match_regex:
|
||||
- "CWE-79"
|
||||
severity_classification: high
|
||||
finding_count:
|
||||
min: 1
|
||||
|
||||
validation:
|
||||
schema_check: true
|
||||
keyword_match_threshold: 0.8
|
||||
reasoning_quality_min: 0.75
|
||||
|
||||
- id: tc004_xss_dom_based_innerhtml
|
||||
description: "Detect DOM-based XSS via innerHTML assignment"
|
||||
category: injection
|
||||
priority: high
|
||||
|
||||
input:
|
||||
code: |
|
||||
// Client-side JavaScript
|
||||
const params = new URLSearchParams(window.location.search);
|
||||
const message = params.get('msg');
|
||||
document.getElementById('output').innerHTML = message;
|
||||
context:
|
||||
language: javascript
|
||||
framework: vanilla
|
||||
environment: production
|
||||
|
||||
expected_output:
|
||||
must_contain:
|
||||
- "DOM"
|
||||
- "XSS"
|
||||
- "innerHTML"
|
||||
- "textContent"
|
||||
must_match_regex:
|
||||
- "CWE-79"
|
||||
severity_classification: high
|
||||
|
||||
validation:
|
||||
schema_check: true
|
||||
keyword_match_threshold: 0.7
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CATEGORY: Authentication Failures (OWASP A07:2021)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
- id: tc005_hardcoded_credentials
|
||||
description: "Detect hardcoded credentials and API keys"
|
||||
category: authentication
|
||||
priority: critical
|
||||
|
||||
input:
|
||||
code: |
|
||||
const ADMIN_PASSWORD = 'admin123';
|
||||
const API_KEY = 'sk-1234567890abcdef';
|
||||
const DATABASE_URL = 'postgres://admin:password123@localhost/db';
|
||||
|
||||
app.post('/login', (req, res) => {
|
||||
if (req.body.password === ADMIN_PASSWORD) {
|
||||
req.session.isAdmin = true;
|
||||
res.send('Login successful');
|
||||
}
|
||||
});
|
||||
context:
|
||||
language: javascript
|
||||
framework: express
|
||||
|
||||
expected_output:
|
||||
must_contain:
|
||||
- "hardcoded"
|
||||
- "credentials"
|
||||
- "secret"
|
||||
- "environment variable"
|
||||
must_match_regex:
|
||||
- "CWE-798|CWE-259"
|
||||
severity_classification: critical
|
||||
finding_count:
|
||||
min: 2
|
||||
|
||||
validation:
|
||||
schema_check: true
|
||||
keyword_match_threshold: 0.8
|
||||
reasoning_quality_min: 0.8
|
||||
|
||||
- id: tc006_weak_password_hashing
|
||||
description: "Detect weak password hashing algorithms (MD5, SHA1)"
|
||||
category: authentication
|
||||
priority: high
|
||||
|
||||
input:
|
||||
code: |
|
||||
const crypto = require('crypto');
|
||||
|
||||
function hashPassword(password) {
|
||||
return crypto.createHash('md5').update(password).digest('hex');
|
||||
}
|
||||
|
||||
function verifyPassword(password, hash) {
|
||||
return hashPassword(password) === hash;
|
||||
}
|
||||
context:
|
||||
language: javascript
|
||||
framework: nodejs
|
||||
|
||||
expected_output:
|
||||
must_contain:
|
||||
- "MD5"
|
||||
- "weak"
|
||||
- "bcrypt"
|
||||
- "argon2"
|
||||
must_match_regex:
|
||||
- "CWE-327|CWE-328|CWE-916"
|
||||
severity_classification: high
|
||||
finding_count:
|
||||
min: 1
|
||||
|
||||
validation:
|
||||
schema_check: true
|
||||
keyword_match_threshold: 0.8
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CATEGORY: Broken Access Control (OWASP A01:2021)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
- id: tc007_idor_missing_authorization
|
||||
description: "Detect IDOR vulnerability with missing authorization check"
|
||||
category: authorization
|
||||
priority: critical
|
||||
|
||||
input:
|
||||
code: |
|
||||
app.get('/api/users/:id/profile', (req, res) => {
|
||||
// No authorization check - any user can access any profile
|
||||
const userId = req.params.id;
|
||||
db.query('SELECT * FROM profiles WHERE user_id = ?', [userId])
|
||||
.then(profile => res.json(profile));
|
||||
});
|
||||
|
||||
app.delete('/api/users/:id', (req, res) => {
|
||||
// No check if requesting user owns this account
|
||||
db.query('DELETE FROM users WHERE id = ?', [req.params.id]);
|
||||
res.send('User deleted');
|
||||
});
|
||||
context:
|
||||
language: javascript
|
||||
framework: express
|
||||
|
||||
expected_output:
|
||||
must_contain:
|
||||
- "authorization"
|
||||
- "access control"
|
||||
- "IDOR"
|
||||
- "ownership"
|
||||
must_match_regex:
|
||||
- "CWE-639|CWE-284|CWE-862"
|
||||
- "A01:2021"
|
||||
severity_classification: critical
|
||||
|
||||
validation:
|
||||
schema_check: true
|
||||
keyword_match_threshold: 0.7
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CATEGORY: Cryptographic Failures (OWASP A02:2021)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
- id: tc008_weak_encryption_des
|
||||
description: "Detect use of weak encryption algorithms (DES, RC4)"
|
||||
category: cryptography
|
||||
priority: high
|
||||
|
||||
input:
|
||||
code: |
|
||||
const crypto = require('crypto');
|
||||
|
||||
function encryptData(data, key) {
|
||||
const cipher = crypto.createCipher('des', key);
|
||||
return cipher.update(data, 'utf8', 'hex') + cipher.final('hex');
|
||||
}
|
||||
|
||||
function decryptData(data, key) {
|
||||
const decipher = crypto.createDecipher('des', key);
|
||||
return decipher.update(data, 'hex', 'utf8') + decipher.final('utf8');
|
||||
}
|
||||
context:
|
||||
language: javascript
|
||||
framework: nodejs
|
||||
|
||||
expected_output:
|
||||
must_contain:
|
||||
- "DES"
|
||||
- "weak"
|
||||
- "deprecated"
|
||||
- "AES"
|
||||
must_match_regex:
|
||||
- "CWE-327|CWE-328"
|
||||
- "A02:2021"
|
||||
severity_classification: high
|
||||
|
||||
validation:
|
||||
schema_check: true
|
||||
keyword_match_threshold: 0.7
|
||||
|
||||
- id: tc009_plaintext_password_storage
|
||||
description: "Detect plaintext password storage"
|
||||
category: cryptography
|
||||
priority: critical
|
||||
|
||||
input:
|
||||
code: |
|
||||
class User {
|
||||
constructor(email, password) {
|
||||
this.email = email;
|
||||
this.password = password; // Stored in plaintext!
|
||||
}
|
||||
|
||||
save() {
|
||||
db.query('INSERT INTO users (email, password) VALUES (?, ?)',
|
||||
[this.email, this.password]);
|
||||
}
|
||||
}
|
||||
context:
|
||||
language: javascript
|
||||
framework: nodejs
|
||||
|
||||
expected_output:
|
||||
must_contain:
|
||||
- "plaintext"
|
||||
- "password"
|
||||
- "hash"
|
||||
- "bcrypt"
|
||||
must_match_regex:
|
||||
- "CWE-256|CWE-312"
|
||||
- "A02:2021"
|
||||
severity_classification: critical
|
||||
|
||||
validation:
|
||||
schema_check: true
|
||||
keyword_match_threshold: 0.8
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CATEGORY: Path Traversal (Related to A01:2021)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
- id: tc010_path_traversal_file_access
|
||||
description: "Detect path traversal vulnerability in file access"
|
||||
category: injection
|
||||
priority: critical
|
||||
|
||||
input:
|
||||
code: |
|
||||
const fs = require('fs');
|
||||
|
||||
app.get('/download', (req, res) => {
|
||||
const filename = req.query.file;
|
||||
const filepath = './uploads/' + filename;
|
||||
res.sendFile(filepath);
|
||||
});
|
||||
|
||||
app.get('/read', (req, res) => {
|
||||
const content = fs.readFileSync('./data/' + req.params.name);
|
||||
res.send(content);
|
||||
});
|
||||
context:
|
||||
language: javascript
|
||||
framework: express
|
||||
|
||||
expected_output:
|
||||
must_contain:
|
||||
- "path traversal"
|
||||
- "directory traversal"
|
||||
- "../"
|
||||
- "sanitize"
|
||||
must_match_regex:
|
||||
- "CWE-22|CWE-23"
|
||||
severity_classification: critical
|
||||
|
||||
validation:
|
||||
schema_check: true
|
||||
keyword_match_threshold: 0.7
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CATEGORY: Negative Tests (No False Positives)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
- id: tc011_secure_code_no_false_positives
|
||||
description: "Verify secure code is NOT flagged as vulnerable"
|
||||
category: negative
|
||||
priority: critical
|
||||
|
||||
input:
|
||||
code: |
|
||||
const express = require('express');
|
||||
const helmet = require('helmet');
|
||||
const rateLimit = require('express-rate-limit');
|
||||
const bcrypt = require('bcrypt');
|
||||
const validator = require('validator');
|
||||
|
||||
const app = express();
|
||||
app.use(helmet());
|
||||
app.use(rateLimit({ windowMs: 15 * 60 * 1000, max: 100 }));
|
||||
|
||||
app.post('/api/users', async (req, res) => {
|
||||
const { email, password } = req.body;
|
||||
|
||||
// Input validation
|
||||
if (!validator.isEmail(email)) {
|
||||
return res.status(400).json({ error: 'Invalid email' });
|
||||
}
|
||||
|
||||
// Secure password hashing
|
||||
const hashedPassword = await bcrypt.hash(password, 12);
|
||||
|
||||
// Parameterized query
|
||||
await db.query(
|
||||
'INSERT INTO users (email, password) VALUES ($1, $2)',
|
||||
[email, hashedPassword]
|
||||
);
|
||||
|
||||
res.status(201).json({ message: 'User created' });
|
||||
});
|
||||
context:
|
||||
language: javascript
|
||||
framework: express
|
||||
environment: production
|
||||
|
||||
expected_output:
|
||||
must_contain:
|
||||
- "secure"
|
||||
- "best practice"
|
||||
must_not_contain:
|
||||
- "SQL injection"
|
||||
- "XSS"
|
||||
- "critical vulnerability"
|
||||
- "high severity"
|
||||
finding_count:
|
||||
max: 2 # Allow informational findings only
|
||||
|
||||
validation:
|
||||
schema_check: true
|
||||
keyword_match_threshold: 0.6
|
||||
allow_partial: true
|
||||
|
||||
- id: tc012_secure_auth_implementation
|
||||
description: "Verify secure authentication is recognized as safe"
|
||||
category: negative
|
||||
priority: high
|
||||
|
||||
input:
|
||||
code: |
|
||||
const bcrypt = require('bcrypt');
|
||||
const jwt = require('jsonwebtoken');
|
||||
|
||||
async function login(email, password) {
|
||||
const user = await User.findByEmail(email);
|
||||
if (!user) {
|
||||
return { error: 'Invalid credentials' };
|
||||
}
|
||||
|
||||
const match = await bcrypt.compare(password, user.passwordHash);
|
||||
if (!match) {
|
||||
return { error: 'Invalid credentials' };
|
||||
}
|
||||
|
||||
const token = jwt.sign(
|
||||
{ userId: user.id },
|
||||
process.env.JWT_SECRET,
|
||||
{ expiresIn: '1h' }
|
||||
);
|
||||
|
||||
return { token };
|
||||
}
|
||||
context:
|
||||
language: javascript
|
||||
framework: nodejs
|
||||
|
||||
expected_output:
|
||||
must_contain:
|
||||
- "bcrypt"
|
||||
- "jwt"
|
||||
- "secure"
|
||||
must_not_contain:
|
||||
- "vulnerable"
|
||||
- "critical"
|
||||
- "hardcoded"
|
||||
severity_classification: info
|
||||
|
||||
validation:
|
||||
schema_check: true
|
||||
allow_partial: true
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CATEGORY: Python Security (Multi-language Support)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
- id: tc013_python_sql_injection
|
||||
description: "Detect SQL injection in Python Flask application"
|
||||
category: injection
|
||||
priority: critical
|
||||
|
||||
input:
|
||||
code: |
|
||||
from flask import Flask, request
|
||||
import sqlite3
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/user')
|
||||
def get_user():
|
||||
user_id = request.args.get('id')
|
||||
conn = sqlite3.connect('users.db')
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(f"SELECT * FROM users WHERE id = {user_id}")
|
||||
return str(cursor.fetchone())
|
||||
context:
|
||||
language: python
|
||||
framework: flask
|
||||
|
||||
expected_output:
|
||||
must_contain:
|
||||
- "SQL injection"
|
||||
- "parameterized"
|
||||
- "f-string"
|
||||
must_match_regex:
|
||||
- "CWE-89"
|
||||
severity_classification: critical
|
||||
finding_count:
|
||||
min: 1
|
||||
|
||||
validation:
|
||||
schema_check: true
|
||||
keyword_match_threshold: 0.7
|
||||
|
||||
- id: tc014_python_ssti_jinja
|
||||
description: "Detect Server-Side Template Injection in Jinja2"
|
||||
category: injection
|
||||
priority: critical
|
||||
|
||||
input:
|
||||
code: |
|
||||
from flask import Flask, request, render_template_string
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/render')
|
||||
def render():
|
||||
template = request.args.get('template')
|
||||
return render_template_string(template)
|
||||
context:
|
||||
language: python
|
||||
framework: flask
|
||||
|
||||
expected_output:
|
||||
must_contain:
|
||||
- "SSTI"
|
||||
- "template injection"
|
||||
- "render_template_string"
|
||||
- "Jinja2"
|
||||
must_match_regex:
|
||||
- "CWE-94|CWE-1336"
|
||||
severity_classification: critical
|
||||
|
||||
validation:
|
||||
schema_check: true
|
||||
keyword_match_threshold: 0.7
|
||||
|
||||
- id: tc015_python_pickle_deserialization
|
||||
description: "Detect insecure deserialization with pickle"
|
||||
category: injection
|
||||
priority: critical
|
||||
|
||||
input:
|
||||
code: |
|
||||
import pickle
|
||||
from flask import Flask, request
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/load')
|
||||
def load_data():
|
||||
data = request.get_data()
|
||||
obj = pickle.loads(data)
|
||||
return str(obj)
|
||||
context:
|
||||
language: python
|
||||
framework: flask
|
||||
|
||||
expected_output:
|
||||
must_contain:
|
||||
- "pickle"
|
||||
- "deserialization"
|
||||
- "untrusted"
|
||||
- "RCE"
|
||||
must_match_regex:
|
||||
- "CWE-502"
|
||||
- "A08:2021"
|
||||
severity_classification: critical
|
||||
|
||||
validation:
|
||||
schema_check: true
|
||||
keyword_match_threshold: 0.7
|
||||
|
||||
# =============================================================================
|
||||
# SUCCESS CRITERIA
|
||||
# =============================================================================
|
||||
|
||||
success_criteria:
|
||||
# Overall pass rate (90% of tests must pass)
|
||||
pass_rate: 0.9
|
||||
|
||||
# Critical tests must ALL pass (100%)
|
||||
critical_pass_rate: 1.0
|
||||
|
||||
# Average reasoning quality score
|
||||
avg_reasoning_quality: 0.75
|
||||
|
||||
# Maximum suite execution time (5 minutes)
|
||||
max_execution_time_ms: 300000
|
||||
|
||||
# Maximum variance between model results (15%)
|
||||
cross_model_variance: 0.15
|
||||
|
||||
# =============================================================================
|
||||
# METADATA
|
||||
# =============================================================================
|
||||
|
||||
metadata:
|
||||
author: "qe-security-auditor"
|
||||
created: "2026-02-02"
|
||||
last_updated: "2026-02-02"
|
||||
coverage_target: >
|
||||
OWASP Top 10 2021: A01 (Broken Access Control), A02 (Cryptographic Failures),
|
||||
A03 (Injection - SQL, XSS, SSTI, Command), A07 (Authentication Failures),
|
||||
A08 (Software Integrity - Deserialization). Covers JavaScript/Node.js
|
||||
Express apps and Python Flask apps. 15 test cases with 90% pass rate
|
||||
requirement and 100% critical pass rate.
|
||||
Reference in New Issue
Block a user