-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathscanner.go
More file actions
1187 lines (1055 loc) · 39.1 KB
/
scanner.go
File metadata and controls
1187 lines (1055 loc) · 39.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2026 GoSQLX Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package security provides SQL injection detection and security scanning for GoSQLX.
//
// The primary entry points are NewScanner (creates a scanner that reports all severity
// levels), NewScannerWithSeverity (creates a scanner filtered to a minimum severity),
// Scanner.Scan (analyzes a parsed *ast.AST via deep tree traversal), and Scanner.ScanSQL
// (analyzes a raw SQL string using pre-compiled regex patterns). Both methods return a
// *ScanResult containing all Findings with severity, pattern type, risk description, and
// remediation suggestions, plus summary counts accessible via HasCritical(),
// HasHighOrAbove(), and IsClean().
//
// # Overview
//
// The security scanner performs static analysis on SQL to detect potential
// injection attacks and unsafe patterns. It uses a combination of AST traversal,
// pattern matching, and heuristic analysis to identify security issues.
//
// # Pattern Detection
//
// The scanner detects 8 types of SQL injection patterns:
//
// - TAUTOLOGY: Always-true conditions (1=1, 'a'='a') used to bypass authentication
// - COMMENT_BYPASS: Comment-based injection (--, /**/, #) to bypass validation
// - UNION_BASED: UNION SELECT patterns for data extraction and schema enumeration
// - STACKED_QUERY: Multiple statements with destructive operations (DROP, DELETE)
// - TIME_BASED: Time delay functions (SLEEP, WAITFOR, pg_sleep) for blind injection
// - OUT_OF_BAND: External data exfiltration (xp_cmdshell, LOAD_FILE, UTL_HTTP)
// - DANGEROUS_FUNCTION: Dynamic SQL execution (EXEC, sp_executesql, PREPARE FROM)
// - BOOLEAN_BASED: Conditional logic exploitation for data extraction
//
// # Severity Levels
//
// Each finding is assigned one of four severity levels:
//
// - CRITICAL: Definite injection pattern detected (e.g., OR 1=1 --)
// - HIGH: Highly suspicious patterns requiring immediate review
// - MEDIUM: Potentially unsafe patterns that need investigation
// - LOW: Informational findings and best practice violations
//
// # Basic Usage
//
// AST-based scanning:
//
// import (
// "github.com/ajitpratap0/GoSQLX/pkg/sql/parser"
// "github.com/ajitpratap0/GoSQLX/pkg/sql/security"
// )
//
// // Parse SQL into AST
// ast, err := parser.Parse(tokens)
// if err != nil {
// log.Fatal(err)
// }
//
// // Scan for security issues
// scanner := security.NewScanner()
// results := scanner.Scan(ast)
//
// // Review findings
// for _, finding := range results.Findings {
// fmt.Printf("[%s] %s: %s\n",
// finding.Severity,
// finding.Pattern,
// finding.Description)
// }
//
// Raw SQL scanning:
//
// scanner := security.NewScanner()
// results := scanner.ScanSQL("SELECT * FROM users WHERE id = 1 OR 1=1 --")
//
// if results.HasCritical() {
// fmt.Println("CRITICAL security issues found!")
// for _, f := range results.Findings {
// fmt.Printf(" - %s: %s\n", f.Pattern, f.Description)
// fmt.Printf(" Risk: %s\n", f.Risk)
// fmt.Printf(" Suggestion: %s\n", f.Suggestion)
// }
// }
//
// # Filtering by Severity
//
// Filter findings by minimum severity level:
//
// // Only report HIGH and CRITICAL findings
// scanner, err := security.NewScannerWithSeverity(security.SeverityHigh)
// if err != nil {
// log.Fatal(err)
// }
//
// results := scanner.Scan(ast)
// fmt.Printf("Found %d high-severity issues\n", results.HighCount + results.CriticalCount)
//
// # Scan Results
//
// The ScanResult structure provides comprehensive information:
//
// results := scanner.Scan(ast)
//
// fmt.Printf("Total findings: %d\n", results.TotalCount)
// fmt.Printf("Critical: %d, High: %d, Medium: %d, Low: %d\n",
// results.CriticalCount,
// results.HighCount,
// results.MediumCount,
// results.LowCount)
//
// // Check severity thresholds
// if results.IsClean() {
// fmt.Println("No security issues detected")
// }
//
// if results.HasHighOrAbove() {
// fmt.Println("High-priority security issues require attention")
// }
//
// # Finding Details
//
// Each Finding contains detailed information:
//
// for _, finding := range results.Findings {
// fmt.Printf("Pattern: %s\n", finding.Pattern) // Pattern type
// fmt.Printf("Severity: %s\n", finding.Severity) // Risk level
// fmt.Printf("Description: %s\n", finding.Description) // What was found
// fmt.Printf("Risk: %s\n", finding.Risk) // Security impact
// fmt.Printf("Suggestion: %s\n", finding.Suggestion) // Remediation advice
// if finding.Line > 0 {
// fmt.Printf("Location: Line %d, Column %d\n", finding.Line, finding.Column)
// }
// }
//
// # Performance Considerations
//
// The scanner uses pre-compiled regex patterns (initialized once at package load)
// for optimal performance. Scanning is thread-safe and suitable for concurrent use.
//
// # Production Integration
//
// Example CI/CD integration:
//
// scanner := security.NewScanner()
// results := scanner.ScanSQL(userProvidedSQL)
//
// if results.HasCritical() {
// // Block deployment
// log.Fatal("CRITICAL security vulnerabilities detected")
// }
//
// if results.HasHighOrAbove() {
// // Require security review
// fmt.Println("WARNING: High-severity security issues require review")
// }
//
// # Pattern Examples
//
// TAUTOLOGY detection:
//
// "SELECT * FROM users WHERE username='admin' OR 1=1 --"
// → CRITICAL: Always-true condition detected
//
// UNION_BASED detection:
//
// "SELECT name FROM products UNION SELECT password FROM users"
// → CRITICAL: UNION-based data extraction
//
// TIME_BASED detection:
//
// "SELECT * FROM orders WHERE id=1 AND SLEEP(5)"
// → HIGH: Time-based blind injection
//
// STACKED_QUERY detection:
//
// "SELECT * FROM users; DROP TABLE users --"
// → CRITICAL: Stacked query with destructive operation
//
// # Version
//
// This package is part of GoSQLX v1.6.0 and is production-ready for enterprise use.
package security
import (
"fmt"
"regexp"
"strings"
"sync"
"github.com/ajitpratap0/GoSQLX/pkg/sql/ast"
)
// Severity represents the severity level of a security finding.
// It is used to categorize the risk and priority of detected vulnerabilities.
type Severity string
const (
// SeverityCritical indicates definite injection (e.g., OR 1=1 --)
SeverityCritical Severity = "CRITICAL"
// SeverityHigh indicates likely injection (suspicious patterns)
SeverityHigh Severity = "HIGH"
// SeverityMedium indicates potentially unsafe patterns (needs review)
SeverityMedium Severity = "MEDIUM"
// SeverityLow indicates informational findings
SeverityLow Severity = "LOW"
)
// severityOrder maps severity levels to numeric values for comparison.
// Unknown severities default to highest priority (included in all scans).
var severityOrder = map[Severity]int{
SeverityLow: 0,
SeverityMedium: 1,
SeverityHigh: 2,
SeverityCritical: 3,
}
// Pre-compiled regex patterns for performance (compiled once at package init)
var (
compiledPatterns map[PatternType][]*regexp.Regexp
compiledPatternsOnce sync.Once
// Comment detection patterns (pre-compiled)
commentPatterns []struct {
re *regexp.Regexp
description string
severity Severity
}
commentPatternsOnce sync.Once
// tautologyCapturePatterns are used by detectTautologyInSQL to find candidate
// equality pairs. Each pattern captures a left-hand value; the caller verifies
// that the right-hand value matches. Go RE2 does not support backreferences so
// the two-step approach is used instead.
tautologyCapturePatterns []*regexp.Regexp
tautologyCaptureOnce sync.Once
)
// initCompiledPatterns initializes all regex patterns once at package level.
func initCompiledPatterns() {
compiledPatterns = make(map[PatternType][]*regexp.Regexp)
// Time-based blind injection functions
compiledPatterns[PatternTimeBased] = []*regexp.Regexp{
regexp.MustCompile(`(?i)\bSLEEP\s*\(`),
regexp.MustCompile(`(?i)\bWAITFOR\s+DELAY\b`),
regexp.MustCompile(`(?i)\bpg_sleep\s*\(`),
regexp.MustCompile(`(?i)\bBENCHMARK\s*\(`),
regexp.MustCompile(`(?i)\bDBMS_LOCK\.SLEEP\s*\(`),
}
// Out-of-band / dangerous functions
compiledPatterns[PatternOutOfBand] = []*regexp.Regexp{
regexp.MustCompile(`(?i)\bxp_cmdshell\b`),
regexp.MustCompile(`(?i)\bLOAD_FILE\s*\(`),
regexp.MustCompile(`(?i)\bINTO\s+OUTFILE\b`),
regexp.MustCompile(`(?i)\bINTO\s+DUMPFILE\b`),
regexp.MustCompile(`(?i)\bUTL_HTTP\b`),
regexp.MustCompile(`(?i)\bDBMS_LDAP\b`),
regexp.MustCompile(`(?i)\bEXEC\s+master\b`),
regexp.MustCompile(`(?i)\bsp_oacreate\b`),
}
// Dangerous functions that might indicate injection
compiledPatterns[PatternDangerousFunc] = []*regexp.Regexp{
regexp.MustCompile(`(?i)\bEXEC\s*\(`),
regexp.MustCompile(`(?i)\bEXECUTE\s+IMMEDIATE\b`),
regexp.MustCompile(`(?i)\bsp_executesql\b`),
regexp.MustCompile(`(?i)\bPREPARE\s+\w+\s+FROM\b`),
}
// Tautology patterns (always-true conditions used in SQL injection).
// Note: Go's RE2 engine does not support backreferences, so equality of the
// two sides is verified in detectTautologyInSQL (called separately from ScanSQL).
// These patterns are intentionally left empty; tautology detection in raw SQL
// is handled by the dedicated detectTautologyInSQL helper.
compiledPatterns[PatternTautology] = []*regexp.Regexp{
// OR TRUE
regexp.MustCompile(`(?i)\bOR\s+TRUE\b`),
}
// UNION injection fingerprints (CRITICAL): system table access or NULL-padding.
// Also detects bare references to system catalogs, which are injection fingerprints
// regardless of whether a UNION is present.
compiledPatterns[PatternUnionInjection] = []*regexp.Regexp{
// System table access via UNION (injection fingerprint)
regexp.MustCompile(`(?i)UNION\s+(ALL\s+)?SELECT.*\b(information_schema|pg_catalog|mysql\b|sys\.)\b`),
// NULL-padded columns (classic injection to match column count)
regexp.MustCompile(`(?i)UNION\s+(ALL\s+)?SELECT\s+(?:NULL,?\s*){2,}`),
// Bare system catalog references (schema enumeration fingerprint)
regexp.MustCompile(`(?i)\binformation_schema\b`),
}
// Generic UNION SELECT (HIGH): may be legitimate or injection
compiledPatterns[PatternUnionGeneric] = []*regexp.Regexp{
regexp.MustCompile(`(?i)\bUNION\s+(ALL\s+)?SELECT\b`),
}
// Stacked query injection patterns (destructive statements after semicolon)
compiledPatterns[PatternStackedQuery] = []*regexp.Regexp{
regexp.MustCompile(`;\s*(?i)(DROP|DELETE|TRUNCATE|UPDATE|INSERT|ALTER)\b`),
regexp.MustCompile(`;\s*(?i)EXEC\b`),
regexp.MustCompile(`;\s*(?i)EXECUTE\b`),
}
}
// initCommentPatterns initializes comment detection patterns once.
func initCommentPatterns() {
commentPatterns = []struct {
re *regexp.Regexp
description string
severity Severity
}{
{regexp.MustCompile(`--\s*$`), "Trailing SQL comment may indicate injection", SeverityMedium},
{regexp.MustCompile(`--\s*['")\]]`), "Comment after quote/bracket (potential bypass)", SeverityHigh},
{regexp.MustCompile(`/\*[^*]*\*+(?:[^/*][^*]*\*+)*/\s*$`), "Unclosed or trailing block comment may indicate injection", SeverityMedium},
{regexp.MustCompile(`/\*!.*\*/`), "MySQL conditional comment (version-specific execution)", SeverityMedium},
{regexp.MustCompile(`#\s*$`), "Hash comment at end (MySQL)", SeverityMedium},
{regexp.MustCompile(`;\s*--`), "Statement terminator followed by comment", SeverityHigh},
}
}
// maxRegexInputLen guards against ReDoS by limiting the input length
// fed to regexes with nested quantifiers (e.g. the block-comment pattern).
const maxRegexInputLen = 10_000
func safeRegexMatch(re *regexp.Regexp, s string) bool {
if len(s) > maxRegexInputLen {
s = s[:maxRegexInputLen]
}
return re.MatchString(s)
}
// initTautologyCapturePatterns initializes patterns used to detect raw-SQL tautologies.
// Because Go's RE2 engine does not support backreferences, each pattern captures both
// sides of an equality; detectTautologyInSQL then verifies the two captured groups
// are equal before reporting a finding.
func initTautologyCapturePatterns() {
tautologyCapturePatterns = []*regexp.Regexp{
// Numeric: two identical digit sequences around '=' (bounded to prevent ReDoS)
regexp.MustCompile(`(?i)\b(\d{1,6})\s*=\s*(\d{1,6})\b`),
// String literal: 'value' = 'value' (max 50 chars per side, no backtracking risk)
regexp.MustCompile(`(?i)('[^']{0,50}')\s*=\s*('[^']{0,50}')`),
// Identifier: col = col (bounded length, word chars + dots)
regexp.MustCompile(`(?i)\b([a-z_][a-z0-9_.]{0,50})\s*=\s*([a-z_][a-z0-9_.]{0,50})\b`),
}
}
// System table prefixes for precise matching (avoids false positives)
var systemTablePrefixes = []string{
"information_schema.",
"sys.",
"mysql.",
"pg_catalog.",
"pg_",
"sqlite_",
"master.dbo.",
"msdb.",
"tempdb.",
}
// Exact system table names
var systemTableNames = []string{
"information_schema",
"pg_catalog",
"sys",
}
// PatternType categorizes the type of SQL injection pattern detected by the scanner.
// Each pattern type represents a specific attack vector or vulnerability class.
type PatternType string
const (
// PatternTautology detects always-true conditions (1=1, 'a'='a') used to bypass authentication
PatternTautology PatternType = "TAUTOLOGY"
// PatternComment detects comment-based injection (--, /**/, #) to bypass validation
PatternComment PatternType = "COMMENT_BYPASS"
// PatternStackedQuery detects multiple statements with destructive operations (DROP, DELETE)
PatternStackedQuery PatternType = "STACKED_QUERY"
// PatternUnionBased detects UNION SELECT patterns for data extraction and schema enumeration
PatternUnionBased PatternType = "UNION_BASED"
// PatternUnionInjection detects UNION SELECT patterns with injection fingerprints (system
// table access or NULL-column padding). This is a CRITICAL severity signal used by ScanSQL.
PatternUnionInjection PatternType = "UNION_INJECTION"
// PatternUnionGeneric detects any UNION SELECT pattern. HIGH severity - may be legitimate.
// Used by ScanSQL to flag generic UNION SELECT for review.
PatternUnionGeneric PatternType = "UNION_GENERIC"
// PatternTimeBased detects time delay functions (SLEEP, WAITFOR, pg_sleep) for blind injection
PatternTimeBased PatternType = "TIME_BASED"
// PatternBooleanBased detects conditional logic exploitation for data extraction
PatternBooleanBased PatternType = "BOOLEAN_BASED"
// PatternOutOfBand detects external data exfiltration (xp_cmdshell, LOAD_FILE, UTL_HTTP)
PatternOutOfBand PatternType = "OUT_OF_BAND"
// PatternDangerousFunc detects dynamic SQL execution (EXEC, sp_executesql, PREPARE FROM)
PatternDangerousFunc PatternType = "DANGEROUS_FUNCTION"
)
// Finding represents a single security finding from the scanner.
// It contains detailed information about a detected vulnerability including
// severity, pattern type, location, and remediation suggestions.
type Finding struct {
// Severity indicates the risk level (CRITICAL, HIGH, MEDIUM, LOW)
Severity Severity `json:"severity"`
// Pattern indicates the type of injection pattern detected
Pattern PatternType `json:"pattern"`
// Description provides human-readable explanation of what was found
Description string `json:"description"`
// Risk describes the potential security impact
Risk string `json:"risk"`
// Line number where the issue was detected (if available)
Line int `json:"line,omitempty"`
// Column number where the issue was detected (if available)
Column int `json:"column,omitempty"`
// SQL contains the problematic SQL fragment (if available)
SQL string `json:"sql,omitempty"`
// Suggestion provides remediation advice
Suggestion string `json:"suggestion,omitempty"`
}
// ScanResult contains all findings from a security scan along with summary statistics.
// Use the helper methods HasCritical(), HasHighOrAbove(), and IsClean() to
// quickly assess the scan results.
type ScanResult struct {
// Findings contains all detected security issues
Findings []Finding `json:"findings"`
// TotalCount is the total number of findings across all severity levels
TotalCount int `json:"total_count"`
// CriticalCount is the number of CRITICAL severity findings
CriticalCount int `json:"critical_count"`
// HighCount is the number of HIGH severity findings
HighCount int `json:"high_count"`
// MediumCount is the number of MEDIUM severity findings
MediumCount int `json:"medium_count"`
// LowCount is the number of LOW severity findings
LowCount int `json:"low_count"`
}
// Scanner performs security analysis on SQL ASTs and raw SQL strings.
// It detects SQL injection patterns using a combination of AST traversal,
// regex pattern matching, and heuristic analysis.
//
// Scanner is safe for concurrent use from multiple goroutines as it uses
// pre-compiled patterns and maintains no mutable state during scanning.
//
// Example usage:
//
// scanner := security.NewScanner()
// results := scanner.Scan(ast)
// if results.HasCritical() {
// log.Fatal("Critical security issues detected")
// }
type Scanner struct {
// MinSeverity filters findings below this severity level.
// Only findings with severity >= MinSeverity are included in results.
MinSeverity Severity
}
// NewScanner creates a new security scanner with default settings.
// The default scanner reports all findings (MinSeverity = SeverityLow).
//
// The scanner is immediately ready to use and is safe for concurrent scanning
// from multiple goroutines.
//
// Example:
//
// scanner := security.NewScanner()
// results := scanner.Scan(ast)
func NewScanner() *Scanner {
// Initialize package-level patterns once
compiledPatternsOnce.Do(initCompiledPatterns)
commentPatternsOnce.Do(initCommentPatterns)
tautologyCaptureOnce.Do(initTautologyCapturePatterns)
return &Scanner{
MinSeverity: SeverityLow,
}
}
// NewScannerWithSeverity creates a scanner filtering by minimum severity.
// Only findings at or above the specified severity level will be reported.
//
// Returns an error if the severity level is not recognized. Valid severity levels are:
// SeverityLow, SeverityMedium, SeverityHigh, SeverityCritical.
//
// Example:
//
// // Only report HIGH and CRITICAL findings
// scanner, err := security.NewScannerWithSeverity(security.SeverityHigh)
// if err != nil {
// log.Fatal(err)
// }
// results := scanner.Scan(ast)
func NewScannerWithSeverity(minSeverity Severity) (*Scanner, error) {
// Validate severity
if !isValidSeverity(minSeverity) {
return nil, fmt.Errorf("invalid severity level: %s", minSeverity)
}
s := NewScanner()
s.MinSeverity = minSeverity
return s, nil
}
// isValidSeverity checks if a severity level is recognized.
func isValidSeverity(severity Severity) bool {
_, exists := severityOrder[severity]
return exists
}
// Scan analyzes a parsed SQL AST for SQL injection patterns and vulnerabilities.
// It performs deep traversal of the AST to detect suspicious patterns including
// tautologies, dangerous functions, UNION-based injection, and other attack vectors.
//
// The method is safe for concurrent use as it does not modify the Scanner state.
//
// Returns a ScanResult containing all detected findings that meet the MinSeverity
// threshold, along with summary statistics by severity level.
//
// Example:
//
// ast, err := parser.Parse(tokens)
// if err != nil {
// log.Fatal(err)
// }
//
// scanner := security.NewScanner()
// results := scanner.Scan(ast)
//
// fmt.Printf("Found %d security issues\n", results.TotalCount)
// for _, finding := range results.Findings {
// fmt.Printf("[%s] %s\n", finding.Severity, finding.Description)
// }
func (s *Scanner) Scan(tree *ast.AST) *ScanResult {
result := &ScanResult{
Findings: make([]Finding, 0),
}
if tree == nil {
return result
}
for _, stmt := range tree.Statements {
s.scanStatement(stmt, result)
}
// Update counts
s.updateCounts(result)
return result
}
// ScanSQL analyzes raw SQL string for injection patterns using regex-based detection.
// This method is useful for detecting patterns that might not be visible in the AST,
// such as SQL comments, or when you don't have a parsed AST available.
//
// The method uses pre-compiled regex patterns to detect:
// - Comment-based injection (--, /**/, #)
// - Time-based blind injection (SLEEP, WAITFOR, pg_sleep, BENCHMARK)
// - Out-of-band data exfiltration (xp_cmdshell, LOAD_FILE, UTL_HTTP)
// - Dangerous functions (EXEC, sp_executesql, PREPARE FROM)
// - UNION-based injection (UNION SELECT, information_schema)
// - Stacked query injection (semicolon-separated destructive statements)
//
// The method is safe for concurrent use.
//
// Example:
//
// scanner := security.NewScanner()
// results := scanner.ScanSQL("SELECT * FROM users WHERE id = 1 OR 1=1 --")
//
// if results.HasCritical() {
// fmt.Println("CRITICAL security issue detected!")
// for _, finding := range results.Findings {
// fmt.Printf(" %s: %s\n", finding.Pattern, finding.Description)
// }
// }
func (s *Scanner) ScanSQL(sql string) *ScanResult {
result := &ScanResult{
Findings: make([]Finding, 0),
}
// Strip dollar-quoted string content to prevent false positives
sql = stripDollarQuotedStrings(sql)
// Check for comment-based bypass patterns in raw SQL
s.detectCommentPatterns(sql, result)
// Check for tautology patterns (OR 1=1, 'a'='a', etc.)
// detectRegexPatterns handles the simple OR TRUE pattern; detectTautologyInSQL
// handles equality-based tautologies using a two-step capture approach (RE2
// does not support backreferences, so equality is verified programmatically).
s.detectRegexPatterns(sql, PatternTautology, result)
s.detectTautologyInSQL(sql, result)
// Check for time-based patterns
s.detectRegexPatterns(sql, PatternTimeBased, result)
// Check for out-of-band patterns
s.detectRegexPatterns(sql, PatternOutOfBand, result)
// Check for dangerous function patterns
s.detectRegexPatterns(sql, PatternDangerousFunc, result)
// Check for UNION injection fingerprints (CRITICAL: system tables, NULL-padding)
s.detectRegexPatterns(sql, PatternUnionInjection, result)
// Check for generic UNION SELECT (HIGH: may be legitimate, but warrants review)
s.detectRegexPatterns(sql, PatternUnionGeneric, result)
// Check for stacked query patterns
s.detectRegexPatterns(sql, PatternStackedQuery, result)
// Update counts
s.updateCounts(result)
return result
}
// scanStatement analyzes a single statement for injection patterns.
func (s *Scanner) scanStatement(stmt ast.Statement, result *ScanResult) {
switch st := stmt.(type) {
case *ast.SelectStatement:
s.scanSelectStatement(st, result)
case *ast.InsertStatement:
s.scanInsertStatement(st, result)
case *ast.UpdateStatement:
s.scanUpdateStatement(st, result)
case *ast.DeleteStatement:
s.scanDeleteStatement(st, result)
case *ast.SetOperation:
s.scanSetOperation(st, result)
}
}
// scanSelectStatement analyzes SELECT for injection patterns.
func (s *Scanner) scanSelectStatement(stmt *ast.SelectStatement, result *ScanResult) {
// Check WHERE clause for tautologies
if stmt.Where != nil {
s.scanExpression(stmt.Where, result, "WHERE clause")
}
// Check HAVING clause
if stmt.Having != nil {
s.scanExpression(stmt.Having, result, "HAVING clause")
}
// Check for suspicious function calls in columns
for _, col := range stmt.Columns {
s.scanExpressionForDangerousFunctions(col, result)
}
}
// scanInsertStatement analyzes INSERT for injection patterns.
func (s *Scanner) scanInsertStatement(stmt *ast.InsertStatement, result *ScanResult) {
// Check values for suspicious patterns (multi-row support)
for _, row := range stmt.Values {
for _, val := range row {
s.scanExpressionForDangerousFunctions(val, result)
}
}
}
// scanUpdateStatement analyzes UPDATE for injection patterns.
func (s *Scanner) scanUpdateStatement(stmt *ast.UpdateStatement, result *ScanResult) {
// Check WHERE clause
if stmt.Where != nil {
s.scanExpression(stmt.Where, result, "WHERE clause")
}
// Check SET values
for _, assignment := range stmt.Assignments {
s.scanExpressionForDangerousFunctions(assignment.Value, result)
}
}
// scanDeleteStatement analyzes DELETE for injection patterns.
func (s *Scanner) scanDeleteStatement(stmt *ast.DeleteStatement, result *ScanResult) {
// Check WHERE clause
if stmt.Where != nil {
s.scanExpression(stmt.Where, result, "WHERE clause")
}
}
// scanSetOperation analyzes UNION/EXCEPT/INTERSECT for injection patterns.
func (s *Scanner) scanSetOperation(stmt *ast.SetOperation, result *ScanResult) {
// UNION-based injection detection
if strings.ToUpper(stmt.Operator) == "UNION" {
// Check if UNION might be used for data extraction
s.checkUnionInjection(stmt, result)
}
// Recursively scan left and right statements
// Note: SetOperation.Left and .Right are already ast.Statement type
if stmt.Left != nil {
s.scanStatement(stmt.Left, result)
}
if stmt.Right != nil {
s.scanStatement(stmt.Right, result)
}
}
// scanExpression analyzes an expression for injection patterns.
func (s *Scanner) scanExpression(expr ast.Expression, result *ScanResult, context string) {
if expr == nil {
return
}
switch e := expr.(type) {
case *ast.BinaryExpression:
s.scanBinaryExpression(e, result, context)
case *ast.FunctionCall:
s.scanFunctionCall(e, result)
case *ast.UnaryExpression:
if e.Expr != nil {
s.scanExpression(e.Expr, result, context)
}
}
}
// scanBinaryExpression checks for tautologies and suspicious patterns.
func (s *Scanner) scanBinaryExpression(expr *ast.BinaryExpression, result *ScanResult, context string) {
if expr == nil {
return
}
// Check for tautologies (always true conditions)
if s.isTautology(expr) {
finding := Finding{
Severity: SeverityCritical,
Pattern: PatternTautology,
Description: "Always-true condition detected (tautology)",
Risk: "Authentication bypass, data extraction",
Suggestion: "Remove or replace with proper condition",
}
if s.shouldInclude(finding.Severity) {
result.Findings = append(result.Findings, finding)
}
}
// Check for OR-based injection patterns
if strings.ToUpper(expr.Operator) == "OR" {
s.checkOrInjection(expr, result)
}
// Recursively check sub-expressions
s.scanExpression(expr.Left, result, context)
s.scanExpression(expr.Right, result, context)
}
// isTautology checks if an expression is always true.
func (s *Scanner) isTautology(expr *ast.BinaryExpression) bool {
if expr == nil {
return false
}
op := strings.ToUpper(expr.Operator)
if op != "=" && op != "==" {
return false
}
// Check for LiteralValue tautologies: 1=1, 2=2, 'a'='a', etc.
leftLit, leftIsLit := expr.Left.(*ast.LiteralValue)
rightLit, rightIsLit := expr.Right.(*ast.LiteralValue)
if leftIsLit && rightIsLit {
// Same literal values
leftVal := fmt.Sprintf("%v", leftLit.Value)
rightVal := fmt.Sprintf("%v", rightLit.Value)
if leftVal == rightVal {
return true
}
}
// Check for identifier tautologies: col=col
leftIdent, leftIsIdent := expr.Left.(*ast.Identifier)
rightIdent, rightIsIdent := expr.Right.(*ast.Identifier)
if leftIsIdent && rightIsIdent {
if leftIdent.Name == rightIdent.Name {
return true
}
}
return false
}
// checkOrInjection checks for OR-based injection patterns.
func (s *Scanner) checkOrInjection(expr *ast.BinaryExpression, result *ScanResult) {
// Check if the OR condition contains a tautology
if rightBin, ok := expr.Right.(*ast.BinaryExpression); ok {
if s.isTautology(rightBin) {
finding := Finding{
Severity: SeverityCritical,
Pattern: PatternTautology,
Description: "OR condition with tautology detected (e.g., OR 1=1)",
Risk: "Authentication bypass, unauthorized data access",
Suggestion: "Review and sanitize input parameters",
}
if s.shouldInclude(finding.Severity) {
result.Findings = append(result.Findings, finding)
}
}
}
if leftBin, ok := expr.Left.(*ast.BinaryExpression); ok {
if s.isTautology(leftBin) {
finding := Finding{
Severity: SeverityCritical,
Pattern: PatternTautology,
Description: "OR condition with tautology detected",
Risk: "Authentication bypass, unauthorized data access",
Suggestion: "Review and sanitize input parameters",
}
if s.shouldInclude(finding.Severity) {
result.Findings = append(result.Findings, finding)
}
}
}
}
// checkUnionInjection analyzes UNION for potential data extraction.
func (s *Scanner) checkUnionInjection(stmt *ast.SetOperation, result *ScanResult) {
// Check if right side SELECT has suspicious patterns
if rightSelect, ok := stmt.Right.(*ast.SelectStatement); ok {
// Check for NULL placeholders (common in UNION injection)
nullCount := 0
for _, col := range rightSelect.Columns {
if ident, ok := col.(*ast.Identifier); ok {
if strings.ToUpper(ident.Name) == "NULL" {
nullCount++
}
}
}
// Multiple NULLs in UNION SELECT is suspicious
if nullCount >= 2 {
finding := Finding{
Severity: SeverityHigh,
Pattern: PatternUnionBased,
Description: "UNION SELECT with multiple NULL columns detected",
Risk: "Data extraction via UNION-based injection",
Suggestion: "Verify UNION is intentional and inputs are sanitized",
}
if s.shouldInclude(finding.Severity) {
result.Findings = append(result.Findings, finding)
}
}
// Check for system table access using precise matching
if rightSelect.TableName != "" {
if s.isSystemTable(rightSelect.TableName) {
finding := Finding{
Severity: SeverityCritical,
Pattern: PatternUnionBased,
Description: "UNION SELECT accessing system tables detected",
Risk: "Database schema enumeration, privilege escalation",
Suggestion: "Block access to system tables from user queries",
}
if s.shouldInclude(finding.Severity) {
result.Findings = append(result.Findings, finding)
}
}
}
}
}
// isSystemTable checks if a table name refers to a system table using precise matching.
// Uses prefix matching and exact name matching to avoid false positives.
func (s *Scanner) isSystemTable(tableName string) bool {
tableLower := strings.ToLower(tableName)
// Check exact matches first
for _, name := range systemTableNames {
if tableLower == name {
return true
}
}
// Check prefix matches (e.g., "information_schema.tables", "pg_class")
for _, prefix := range systemTablePrefixes {
if strings.HasPrefix(tableLower, prefix) {
return true
}
}
return false
}
// scanFunctionCall checks for dangerous function usage.
func (s *Scanner) scanFunctionCall(fn *ast.FunctionCall, result *ScanResult) {
if fn == nil {
return
}
funcName := strings.ToUpper(fn.Name)
// Time-based blind injection functions
timeBasedFuncs := map[string]bool{
"SLEEP": true,
"PG_SLEEP": true,
"BENCHMARK": true,
"WAITFOR": true,
}
if timeBasedFuncs[funcName] {
finding := Finding{
Severity: SeverityHigh,
Pattern: PatternTimeBased,
Description: "Time-based blind injection function detected: " + fn.Name,
Risk: "Time-based blind SQL injection, DoS",
Suggestion: "Block or restrict time delay functions",
}
if s.shouldInclude(finding.Severity) {
result.Findings = append(result.Findings, finding)
}
}
// Out-of-band / dangerous functions
dangerousFuncs := map[string]string{
"LOAD_FILE": "File system access",
"LOAD DATA": "File system access",
"XP_CMDSHELL": "Command execution",
"SP_OACREATE": "OLE automation",
"UTL_HTTP": "Network access",
"DBMS_LDAP": "LDAP access",
"EXEC": "Dynamic SQL execution",
"SP_EXECUTESQL": "Dynamic SQL execution",
}
if risk, found := dangerousFuncs[funcName]; found {
finding := Finding{
Severity: SeverityCritical,
Pattern: PatternOutOfBand,
Description: "Dangerous function detected: " + fn.Name,
Risk: risk,
Suggestion: "Block dangerous functions or use allowlist",
}
if s.shouldInclude(finding.Severity) {
result.Findings = append(result.Findings, finding)
}
}
// Recursively check function arguments
for _, arg := range fn.Arguments {
s.scanExpressionForDangerousFunctions(arg, result)
}
}
// scanExpressionForDangerousFunctions recursively checks for dangerous functions.
func (s *Scanner) scanExpressionForDangerousFunctions(expr ast.Expression, result *ScanResult) {
if expr == nil {
return
}
switch e := expr.(type) {
case *ast.FunctionCall:
s.scanFunctionCall(e, result)
case *ast.BinaryExpression:
s.scanExpressionForDangerousFunctions(e.Left, result)
s.scanExpressionForDangerousFunctions(e.Right, result)
case *ast.UnaryExpression:
s.scanExpressionForDangerousFunctions(e.Expr, result)
}
}
// detectCommentPatterns checks raw SQL for comment-based injection.
func (s *Scanner) detectCommentPatterns(sql string, result *ScanResult) {
// Ensure patterns are initialized
commentPatternsOnce.Do(initCommentPatterns)
for _, p := range commentPatterns {