-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Expand file tree
/
Copy pathHashlibModule.qll
More file actions
237 lines (204 loc) · 8.94 KB
/
HashlibModule.qll
File metadata and controls
237 lines (204 loc) · 8.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import python
import semmle.python.ApiGraphs
import experimental.cryptography.CryptoArtifact
private import experimental.cryptography.utils.Utils as Utils
private import experimental.cryptography.CryptoAlgorithmNames
/**
* `hashlib` is a ptyhon standard library module for hashing algorithms.
* https://docs.python.org/3/library/hashlib.html
* This is an abstract class to reference all hashlib artifacts.
*/
// -----------------------------------------------
// Hash Artifacts
// -----------------------------------------------
module Hashes {
/**
* Represents a hash algorithm used by `hashlib.new`, where the hash algorithm is a string in the first argument.
*/
class HashlibNewHashAlgorithm extends HashAlgorithm {
HashlibNewHashAlgorithm() {
this =
Utils::getUltimateSrcFromApiNode(API::moduleImport("hashlib")
.getMember("new")
.getACall()
.getParameter(0, "name"))
}
override string getName() {
result = super.normalizeName(this.asExpr().(StringLiteral).getText())
or
// if not a known/static string, assume from an outside source and the algorithm is UNKNOWN
not this.asExpr() instanceof StringLiteral and result = unknownAlgorithm()
}
}
/**
* Identifies hashlib.pbdkf2_hmac calls, identifying the hash algorithm used
* in the hmac (matching kdf is handled separately by `HashlibPbkdf2HMACArtifact`).
*
* https://docs.python.org/3/library/hashlib.html#hashlib.pbkdf2_hmac
*/
class HashlibPbkdf2HMACHashAlgorithm extends HashAlgorithm {
HashlibPbkdf2HMACHashAlgorithm() {
this =
Utils::getUltimateSrcFromApiNode(API::moduleImport("hashlib")
.getMember("pbkdf2_hmac")
.getACall()
.getParameter(0, "hash_name"))
}
override string getName() {
result = super.normalizeName(this.asExpr().(StringLiteral).getText())
or
// if not a known/static string, assume from an outside source and the algorithm is UNKNOWN
not this.asExpr() instanceof StringLiteral and result = unknownAlgorithm()
}
}
/**
* Gets a call to `hashlib.file_digest` where the hash algorithm is the first argument in `digest`
* `nameSrc` is the source of the first argument.
*
* https://docs.python.org/3/library/hashlib.html#hashlib.file_digest
*
* NOTE: the digest argument can be, in addition to a string,
* a callable that returns a hash object or a hash constructor.
* These cases are not considered here since they would be detected separately.
* Specifically, other non-string cases are themselves considered sources for alerts, e.g.,
* references to hashlib.sha512 is found by `HashlibMemberAlgorithm`.
* The only exception is if the source is not a string constant or HashlibMemberAlgorithm.
* In these cases, the algorithm is considered 'UNKNOWN'.
*/
class HashlibFileDigestAlgorithm extends HashAlgorithm {
HashlibFileDigestAlgorithm() {
this =
Utils::getUltimateSrcFromApiNode(API::moduleImport("hashlib")
.getMember("file_digest")
.getACall()
.getParameter(1, "digest")) and
// Ignore sources that are hash constructors, allow `HashlibMemberAlgorithm` to detect these
this != hashlibMemberHashAlgorithm(_) and
// Ignore sources that are HMAC objects, to be handled by HmacModule
this != API::moduleImport("hmac").getMember("new").getACall() and
this != API::moduleImport("hmac").getMember("HMAC").getACall()
}
override string getName() {
// Name is a string constant or consider the name unknown
// NOTE: we are excluding hmac.new and hmac.HMAC constructor calls so we are expecting
// a string or an outside configuration only
result = super.normalizeName(this.asExpr().(StringLiteral).getText())
or
not this.asExpr() instanceof StringLiteral and
result = unknownAlgorithm()
}
}
/**
* Gets a member access of hashlib that is an algorithm invocation.
* `hashName` is the name of the hash algorithm.
*
* Note: oringally a variant of this predicate was in codeql/github/main
* to a predicate to avoid a bad join order.
*/
// Copying use of nomagic from similar predicate in codeql/main
pragma[nomagic]
DataFlow::Node hashlibMemberHashAlgorithm(string hashName) {
result = API::moduleImport("hashlib").getMember(hashName).asSource() and
// Don't matches known non-hash members
not hashName in [
"new", "pbkdf2_hmac", "algorithms_available", "algorithms_guaranteed", "file_digest"
] and
// Don't match things like __file__
not hashName.regexpMatch("_.*")
}
/**
* Identifies hashing algorithm members (i.e., functions) of the `hashlib` module,
* e.g., `hashlib.sha512`.
*/
class HashlibMemberAlgorithm extends HashAlgorithm {
HashlibMemberAlgorithm() { this = hashlibMemberHashAlgorithm(_) }
override string getName() {
exists(string rawName |
result = super.normalizeName(rawName) and this = hashlibMemberHashAlgorithm(rawName)
)
}
}
}
// -----------------------------------------------
// Key Derivation Functions
// -----------------------------------------------
module KDF {
// NOTE: Only finds the params of `pbkdf2_hmac` that are non-optional
// dk_len is optional, i.e., can be None, and if addressed in this predicate
// would result in an unsatisfiable predicate.
predicate hashlibPBDKF2HMACKDFRequiredParams(
HashlibPbkdf2HMACOperation kdf, API::Node hashParam, API::Node saltParam,
API::Node iterationParam
) {
kdf.getParameter(0, "hash_name") = hashParam and
kdf.getParameter(2, "salt") = saltParam and
kdf.getParameter(3, "iterations") = iterationParam
}
predicate hashlibPBDKF2HMACKDFOptionalParams(HashlibPbkdf2HMACOperation kdf, API::Node keylenParam) {
kdf.getParameter(4, "dklen") = keylenParam
}
/**
* Identifies kery derivation function hashlib.pbdkf2_hmac accesses.
* https://docs.python.org/3/library/hashlib.html#hashlib.pbkdf2_hmac
*/
class HashlibPbkdf2HMACOperation extends KeyDerivationAlgorithm, KeyDerivationOperation {
HashlibPbkdf2HMACOperation() {
this = API::moduleImport("hashlib").getMember("pbkdf2_hmac").getACall()
}
override string getName() { result = super.normalizeName("pbkdf2_hmac") }
override DataFlow::Node getIterationSizeSrc() {
exists(API::Node it | hashlibPBDKF2HMACKDFRequiredParams(this, _, _, it) |
result = Utils::getUltimateSrcFromApiNode(it)
)
}
override DataFlow::Node getSaltConfigSrc() {
exists(API::Node s | hashlibPBDKF2HMACKDFRequiredParams(this, _, s, _) |
result = Utils::getUltimateSrcFromApiNode(s)
)
}
override DataFlow::Node getHashConfigSrc() {
exists(API::Node h | hashlibPBDKF2HMACKDFRequiredParams(this, h, _, _) |
result = Utils::getUltimateSrcFromApiNode(h)
)
}
override DataFlow::Node getDerivedKeySizeSrc() {
exists(API::Node dk | hashlibPBDKF2HMACKDFOptionalParams(this, dk) |
result = Utils::getUltimateSrcFromApiNode(dk)
)
}
// TODO: if DK is none, then the length is based on the hash type, if hash length not known, must call this unknown
// The issue is the src is what we model not the size
// For now, we are not modeling this and are relying on the fact that the accepted hashes are of accepted length.
// I.e., any query looking at length will ignore cases where it is unknown
override KeyDerivationAlgorithm getAlgorithm() { result = this }
override predicate requiresHash() { any() }
override predicate requiresMode() { none() }
override predicate requiresSalt() { any() }
override predicate requiresIteration() { any() }
}
// TODO: better modeling of scrypt
/**
* Identifies key derivation function hashlib.scrypt accesses.
*/
class HashlibScryptAlgorithm extends KeyDerivationAlgorithm, KeyDerivationOperation {
HashlibScryptAlgorithm() { this = API::moduleImport("hashlib").getMember("scrypt").getACall() }
override string getName() { result = super.normalizeName("scrypt") }
override DataFlow::Node getIterationSizeSrc() { none() }
override DataFlow::Node getSaltConfigSrc() {
// TODO: need to address getting salt from params, unsure how this works in CodeQL
// since the signature is defined as hashlib.scrypt(password, *, salt, n, r, p, maxmem=0, dklen=64)
// What position is 'salt' then such that we can reliably extract it?
none()
}
override DataFlow::Node getHashConfigSrc() { none() }
override DataFlow::Node getDerivedKeySizeSrc() {
//TODO: see comment for getSaltConfigSrc above
none()
}
override KeyDerivationAlgorithm getAlgorithm() { result = this }
override predicate requiresHash() { none() }
override predicate requiresMode() { none() }
override predicate requiresSalt() { any() }
override predicate requiresIteration() { none() }
}
}