Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions language/v2/language_classify_gcs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# To install the latest published package dependency, execute the following:
# pip install google-cloud-language

# sample-metadata
# title: Classify Content (GCS)
# description: Classifying Content in text file stored in Cloud Storage

# [START language_classify_gcs]
from google.cloud import language_v2


def sample_classify_text(
gcs_content_uri: str = "gs://cloud-samples-data/language/classify-entertainment.txt",
) -> None:
"""
Classifies Content in text file stored in Cloud Storage.

Args:
gcs_content_uri: Google Cloud Storage URI where the file content is located.
e.g. gs://[Your Bucket]/[Path to File].
"""

client = language_v2.LanguageServiceClient()

# Available types: PLAIN_TEXT, HTML
document_type_in_plain_text = language_v2.Document.Type.PLAIN_TEXT

# Optional. If not specified, the language is automatically detected.
# For list of supported languages:
# https://cloud.google.com/natural-language/docs/languages
language_code = "en"
document = {
"gcs_content_uri": gcs_content_uri,
"type_": document_type_in_plain_text,
"language_code": language_code,
}

response = client.classify_text(request={"document": document})
# Loop through classified categories returned from the API
for category in response.categories:
# Get the name of the category representing the document.
# See the predefined taxonomy of categories:
# https://cloud.google.com/natural-language/docs/categories
print(f"Category name: {category.name}")
# Get the confidence. Number representing how certain the classifier
# is that this category represents the provided text.
print(f"Confidence: {category.confidence}")
# [END language_classify_gcs]
27 changes: 27 additions & 0 deletions language/v2/language_classify_gcs_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import language_classify_gcs


def test_sample_classify_text_gcs(capsys: ...) -> None:
assert os.environ["GOOGLE_CLOUD_PROJECT"] != ""

language_classify_gcs.sample_classify_text()
captured = capsys.readouterr()
assert "Category name: " in captured.out
assert "Confidence: " in captured.out
62 changes: 62 additions & 0 deletions language/v2/language_classify_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# To install the latest published package dependency, execute the following:
# pip install google-cloud-language

# sample-metadata
# title: Classify Content
# description: Classifying Content in a String

# [START language_classify_text]
from google.cloud import language_v2


def sample_classify_text(
text_content: str = "That actor on TV makes movies in Hollywood and also stars in a variety of popular new TV shows.",
) -> None:
"""
Classifies Content in a string.

Args:
text_content: The text content to analyze.
"""

client = language_v2.LanguageServiceClient()

# Available types: PLAIN_TEXT, HTML
document_type_in_plain_text = language_v2.Document.Type.PLAIN_TEXT

# Optional. If not specified, the language is automatically detected.
# For list of supported languages:
# https://cloud.google.com/natural-language/docs/languages
language_code = "en"
document = {
"content": text_content,
"type_": document_type_in_plain_text,
"language_code": language_code,
}

response = client.classify_text(request={"document": document})
# Loop through classified categories returned from the API
for category in response.categories:
# Get the name of the category representing the document.
# See the predefined taxonomy of categories:
# https://cloud.google.com/natural-language/docs/categories
print(f"Category name: {category.name}")
# Get the confidence. Number representing how certain the classifier
# is that this category represents the provided text.
print(f"Confidence: {category.confidence}")
# [END language_classify_text]
27 changes: 27 additions & 0 deletions language/v2/language_classify_text_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import language_classify_text


def test_sample_classify_text(capsys: ...) -> None:
assert os.environ["GOOGLE_CLOUD_PROJECT"] != ""

language_classify_text.sample_classify_text()
captured = capsys.readouterr()
assert "Category name: " in captured.out
assert "Confidence: " in captured.out
91 changes: 91 additions & 0 deletions language/v2/language_entities_gcs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# To install the latest published package dependency, execute the following:
# pip install google-cloud-language

# sample-metadata
# title: Analyzing Entities (GCS)
# description: Analyzing Entities in text file stored in Cloud Storage

# [START language_entities_gcs]
from google.cloud import language_v2


def sample_analyze_entities(
gcs_content_uri: str = "gs://cloud-samples-data/language/entity.txt",
) -> None:
"""
Analyzes Entities in text file stored in Cloud Storage.

Args:
gcs_content_uri: Google Cloud Storage URI where the file content is located.
e.g. gs://[Your Bucket]/[Path to File]
"""

client = language_v2.LanguageServiceClient()

# Available types: PLAIN_TEXT, HTML
document_type_in_plain_text = language_v2.Document.Type.PLAIN_TEXT

# Optional. If not specified, the language is automatically detected.
# For list of supported languages:
# https://cloud.google.com/natural-language/docs/languages
language_code = "en"
document = {
"gcs_content_uri": gcs_content_uri,
"type_": document_type_in_plain_text,
"language_code": language_code,
}

# Available values: NONE, UTF8, UTF16, UTF32.
# See https://cloud.google.com/natural-language/docs/reference/rest/v2/EncodingType.
encoding_type = language_v2.EncodingType.UTF8

response = client.analyze_entities(
request={"document": document, "encoding_type": encoding_type}
)

for entity in response.entities:
print(f"Representative name for the entity: {entity.name}")

# Get entity type, e.g. PERSON, LOCATION, ADDRESS, NUMBER, et al.
# See https://cloud.google.com/natural-language/docs/reference/rest/v2/Entity#type.
print(f"Entity type: {language_v2.Entity.Type(entity.type_).name}")

# Loop over the metadata associated with entity.
# Some entity types may have additional metadata, e.g. ADDRESS entities
# may have metadata for the address street_name, postal_code, et al.
for metadata_name, metadata_value in entity.metadata.items():
print(f"{metadata_name}: {metadata_value}")

# Loop over the mentions of this entity in the input document.
# The API currently supports proper noun mentions.
for mention in entity.mentions:
print(f"Mention text: {mention.text.content}")

# Get the mention type, e.g. PROPER for proper noun
print(
"Mention type:" f" {language_v2.EntityMention.Type(mention.type_).name}"
)

# Get the probability score associated with the first mention of the entity in the (0, 1.0] range.
print(f"Probability score: {mention.probability}")

# Get the language of the text, which will be the same as
# the language specified in the request or, if not specified,
# the automatically-detected language.
print(f"Language of the text: {response.language_code}")
# [END language_entities_gcs]
31 changes: 31 additions & 0 deletions language/v2/language_entities_gcs_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import language_entities_gcs


def test_sample_analyze_entities_gcs(capsys: ...) -> None:
assert os.environ["GOOGLE_CLOUD_PROJECT"] != ""

language_entities_gcs.sample_analyze_entities()
captured = capsys.readouterr()
assert "Representative name for the entity: " in captured.out
assert "Entity type: " in captured.out
assert "Mention text: " in captured.out
assert "Mention type: " in captured.out
assert "Probability score: " in captured.out
assert "Language of the text: " in captured.out
86 changes: 86 additions & 0 deletions language/v2/language_entities_text.py
Comment thread
kornosk marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# To install the latest published package dependency, execute the following:
# pip install google-cloud-language

# sample-metadata
# title: Analyzing Entities
# description: Analyzing Entities in a String

# [START language_entities_text]
from google.cloud import language_v2


def sample_analyze_entities(text_content: str = "California is a state.") -> None:
"""
Analyzes Entities in a string.

Args:
text_content: The text content to analyze
"""

client = language_v2.LanguageServiceClient()

# Available types: PLAIN_TEXT, HTML
document_type_in_plain_text = language_v2.Document.Type.PLAIN_TEXT

# Optional. If not specified, the language is automatically detected.
# For list of supported languages:
# https://cloud.google.com/natural-language/docs/languages
language_code = "en"
document = {
"content": text_content,
"type_": document_type_in_plain_text,
"language_code": language_code,
}

# Available values: NONE, UTF8, UTF16, UTF32.
# See https://cloud.google.com/natural-language/docs/reference/rest/v2/EncodingType.
encoding_type = language_v2.EncodingType.UTF8

response = client.analyze_entities(
request={"document": document, "encoding_type": encoding_type}
)

for entity in response.entities:
print(f"Representative name for the entity: {entity.name}")

# Get entity type, e.g. PERSON, LOCATION, ADDRESS, NUMBER, et al.
# See https://cloud.google.com/natural-language/docs/reference/rest/v2/Entity#type.
print(f"Entity type: {language_v2.Entity.Type(entity.type_).name}")

# Loop over the metadata associated with entity.
# Some entity types may have additional metadata, e.g. ADDRESS entities
# may have metadata for the address street_name, postal_code, et al.
for metadata_name, metadata_value in entity.metadata.items():
print(f"{metadata_name}: {metadata_value}")

# Loop over the mentions of this entity in the input document.
# The API currently supports proper noun mentions.
for mention in entity.mentions:
print(f"Mention text: {mention.text.content}")

# Get the mention type, e.g. PROPER for proper noun
print(f"Mention type: {language_v2.EntityMention.Type(mention.type_).name}")

# Get the probability score associated with the first mention of the entity in the (0, 1.0] range.
print(f"Probability score: {mention.probability}")

# Get the language of the text, which will be the same as
# the language specified in the request or, if not specified,
# the automatically-detected language.
print(f"Language of the text: {response.language_code}")
# [END language_entities_text]
Loading