Skip to content

Commit 90360d9

Browse files
committed
initial inspect string and file samples and tests
1 parent 8d79fd1 commit 90360d9

5 files changed

Lines changed: 191 additions & 27 deletions

File tree

dlp/dlp_inspect_file.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# Copyright 2018 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# [START dlp_inspect_string]
16+
import os
17+
18+
# Import the client library.
19+
import google.cloud.dlp
20+
21+
22+
def inspect_file():
23+
# Instantiate a client.
24+
dlp = google.cloud.dlp.DlpServiceClient()
25+
26+
inspect_config = {
27+
# The infoTypes of information to match
28+
'info_types': [
29+
{'name': 'PHONE_NUMBER'},
30+
{'name': 'EMAIL_ADDRESS'},
31+
{'name': 'CREDIT_CARD_NUMBER'},
32+
],
33+
# The minimum likelihood required before returning a match
34+
'min_likelihood': 'LIKELIHOOD_UNSPECIFIED',
35+
# Whether to include the matching string
36+
'include_quote': True,
37+
'limits': {
38+
# The maximum number of findings to report per request
39+
# (0 = server maximum)
40+
'max_findings_per_request': 0,
41+
},
42+
}
43+
44+
# Construct the item, containing the file's byte data.
45+
# Before running this code, replace the filename with your filepath
46+
filename = os.path.join(
47+
os.path.dirname(__file__), 'resources', 'test.txt')
48+
with open(filename, mode='rb') as f:
49+
item = {'byte_item': {'type': 'TEXT_UTF8', 'data': f.read()}}
50+
51+
# Convert the project id into a full resource id.
52+
# Before running this code, replace 'YOUR_PROJECT_ID' with your project ID
53+
# or set the GOOGLE_CLOUD_PROJECT environment variable to your project ID.
54+
project_id = os.getenv('GOOGLE_CLOUD_PROJECT') or 'YOUR_PROJECT_ID'
55+
parent = dlp.project_path(project_id)
56+
57+
# Call the API.
58+
response = dlp.inspect_content(parent, inspect_config, item)
59+
60+
# Print out the results.
61+
if response.result.findings:
62+
for finding in response.result.findings:
63+
try:
64+
if finding.quote:
65+
print('Quote: {}'.format(finding.quote))
66+
except AttributeError:
67+
pass
68+
print('Info type: {}'.format(finding.info_type.name))
69+
print('Likelihood: {}'.format(finding.likelihood))
70+
else:
71+
print('No findings.')
72+
# [END dlp_inspect_string]
73+
74+
75+
if __name__ == '__main__':
76+
inspect_file()

dlp/dlp_inspect_file_test.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Copyright 2018 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def test_inspect_file(capsys):
17+
import dlp_inspect_file
18+
19+
dlp_inspect_file.inspect_file()
20+
out, _ = capsys.readouterr()
21+
assert 'Info type: EMAIL_ADDRESS' in out

dlp/dlp_inspect_string.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# Copyright 2018 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# [START dlp_inspect_string]
16+
import os
17+
18+
# Import the client library.
19+
import google.cloud.dlp
20+
21+
22+
def inspect_string():
23+
# Instantiate a client.
24+
dlp = google.cloud.dlp.DlpServiceClient()
25+
26+
inspect_config = {
27+
# The infoTypes of information to match
28+
'info_types': [
29+
{'name': 'PHONE_NUMBER'},
30+
{'name': 'EMAIL_ADDRESS'},
31+
{'name': 'CREDIT_CARD_NUMBER'},
32+
],
33+
# The minimum likelihood required before returning a match
34+
'min_likelihood': 'LIKELIHOOD_UNSPECIFIED',
35+
# Whether to include the matching string
36+
'include_quote': True,
37+
'limits': {
38+
# The maximum number of findings to report per request
39+
# (0 = server maximum)
40+
'max_findings_per_request': 0,
41+
},
42+
}
43+
44+
# Construct the `item`.
45+
content_string = 'My name is Gary Smith and my email is gary@example.com'
46+
item = {'value': content_string}
47+
48+
# Convert the project id into a full resource id.
49+
# Before running this code, replace 'YOUR_PROJECT_ID' with your project ID
50+
# or set the GOOGLE_CLOUD_PROJECT environment variable to your project ID.
51+
project_id = os.getenv('GOOGLE_CLOUD_PROJECT') or 'YOUR_PROJECT_ID'
52+
parent = dlp.project_path(project_id)
53+
54+
# Call the API.
55+
response = dlp.inspect_content(parent, inspect_config, item)
56+
57+
# Print out the results.
58+
if response.result.findings:
59+
for finding in response.result.findings:
60+
try:
61+
if finding.quote:
62+
print('Quote: {}'.format(finding.quote))
63+
except AttributeError:
64+
pass
65+
print('Info type: {}'.format(finding.info_type.name))
66+
print('Likelihood: {}'.format(finding.likelihood))
67+
else:
68+
print('No findings.')
69+
# [END dlp_inspect_string]
70+
71+
72+
if __name__ == '__main__':
73+
inspect_string()

dlp/dlp_inspect_string_test.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Copyright 2018 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def test_inspect_file(capsys):
17+
import dlp_inspect_string
18+
19+
dlp_inspect_string.inspect_string()
20+
out, _ = capsys.readouterr()
21+
assert 'Info type: EMAIL_ADDRESS' in out

dlp/inspect_content_test.py

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -156,20 +156,6 @@ def bigquery_project():
156156
bigquery_client.delete_dataset(dataset_ref, delete_contents=True)
157157

158158

159-
def test_inspect_string(capsys):
160-
test_string = 'My name is Gary Smith and my email is gary@example.com'
161-
162-
inspect_content.inspect_string(
163-
GCLOUD_PROJECT,
164-
test_string,
165-
['FIRST_NAME', 'EMAIL_ADDRESS'],
166-
include_quote=True)
167-
168-
out, _ = capsys.readouterr()
169-
assert 'Info type: FIRST_NAME' in out
170-
assert 'Info type: EMAIL_ADDRESS' in out
171-
172-
173159
def test_inspect_string_with_custom_info_types(capsys):
174160
test_string = 'My name is Gary Smith and my email is gary@example.com'
175161
dictionaries = ['Gary Smith']
@@ -201,19 +187,6 @@ def test_inspect_string_no_results(capsys):
201187
assert 'No findings' in out
202188

203189

204-
def test_inspect_file(capsys):
205-
test_filepath = os.path.join(RESOURCE_DIRECTORY, 'test.txt')
206-
207-
inspect_content.inspect_file(
208-
GCLOUD_PROJECT,
209-
test_filepath,
210-
['FIRST_NAME', 'EMAIL_ADDRESS'],
211-
include_quote=True)
212-
213-
out, _ = capsys.readouterr()
214-
assert 'Info type: EMAIL_ADDRESS' in out
215-
216-
217190
def test_inspect_file_with_custom_info_types(capsys):
218191
test_filepath = os.path.join(RESOURCE_DIRECTORY, 'test.txt')
219192
dictionaries = ['gary@somedomain.com']

0 commit comments

Comments
 (0)