Skip to content

Commit 044a15c

Browse files
Google APIscopybara-github
authored andcommitted
feat: Add support for V1 and V2 classification models for the V1Beta2 API
PiperOrigin-RevId: 475604619
1 parent bd28cfb commit 044a15c

3 files changed

Lines changed: 133 additions & 41 deletions

File tree

google/cloud/language/v1beta2/BUILD.bazel

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ proto_library(
2727
"//google/api:annotations_proto",
2828
"//google/api:client_proto",
2929
"//google/api:field_behavior_proto",
30-
"@com_google_protobuf//:timestamp_proto",
3130
],
3231
)
3332

@@ -65,13 +64,16 @@ java_grpc_library(
6564
java_gapic_library(
6665
name = "language_java_gapic",
6766
srcs = [":language_proto_with_info"],
67+
gapic_yaml = None,
6868
grpc_service_config = "language_grpc_service_config.json",
69+
service_yaml = "language_v1beta2.yaml",
6970
test_deps = [
7071
":language_java_grpc",
7172
],
7273
transport = "grpc+rest",
7374
deps = [
7475
":language_java_proto",
76+
"//google/api:api_java_proto",
7577
],
7678
)
7779

@@ -164,6 +166,9 @@ py_gapic_library(
164166
srcs = [":language_proto"],
165167
grpc_service_config = "language_grpc_service_config.json",
166168
transport = "grpc",
169+
service_yaml = "language_v1beta2.yaml",
170+
deps = [
171+
],
167172
)
168173

169174
py_test(
@@ -290,6 +295,7 @@ ruby_cloud_gapic_library(
290295
grpc_service_config = "language_grpc_service_config.json",
291296
ruby_cloud_description = "Provides natural language understanding technologies, such as sentiment analysis, entity recognition, entity sentiment analysis, and other text annotations.",
292297
ruby_cloud_title = "Natural Language V1beta2",
298+
service_yaml = "language_v1beta2.yaml",
293299
deps = [
294300
":language_ruby_grpc",
295301
":language_ruby_proto",
@@ -353,4 +359,20 @@ csharp_gapic_assembly_pkg(
353359
##############################################################################
354360
# C++
355361
##############################################################################
356-
# Put your C++ rules here
362+
load(
363+
"@com_google_googleapis_imports//:imports.bzl",
364+
"cc_grpc_library",
365+
"cc_proto_library",
366+
)
367+
368+
cc_proto_library(
369+
name = "language_cc_proto",
370+
deps = [":language_proto"],
371+
)
372+
373+
cc_grpc_library(
374+
name = "language_cc_grpc",
375+
srcs = [":language_proto"],
376+
grpc_only = True,
377+
deps = [":language_cc_proto"],
378+
)

google/cloud/language/v1beta2/language_service.proto

Lines changed: 99 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019 Google LLC.
1+
// Copyright 2022 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -11,7 +11,6 @@
1111
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
14-
//
1514

1615
syntax = "proto3";
1716

@@ -68,7 +67,7 @@ service LanguageService {
6867
}
6968

7069
// Analyzes the syntax of the text and provides sentence boundaries and
71-
// tokenization along with part-of-speech tags, dependency trees, and other
70+
// tokenization along with part of speech tags, dependency trees, and other
7271
// properties.
7372
rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) {
7473
option (google.api.http) = {
@@ -100,7 +99,7 @@ service LanguageService {
10099
}
101100
}
102101

103-
102+
// ################################################################ #
104103
//
105104
// Represents the input to API methods.
106105
message Document {
@@ -116,6 +115,19 @@ message Document {
116115
HTML = 2;
117116
}
118117

118+
// Ways of handling boilerplate detected in the document
119+
enum BoilerplateHandling {
120+
// The boilerplate handling is not specified.
121+
BOILERPLATE_HANDLING_UNSPECIFIED = 0;
122+
123+
// Do not analyze detected boilerplate. Reference web URI is required for
124+
// detecting boilerplate.
125+
SKIP_BOILERPLATE = 1;
126+
127+
// Treat boilerplate the same as content.
128+
KEEP_BOILERPLATE = 2;
129+
}
130+
119131
// Required. If the type is not set or is `TYPE_UNSPECIFIED`,
120132
// returns an `INVALID_ARGUMENT` error.
121133
Type type = 1;
@@ -143,6 +155,15 @@ message Document {
143155
// specified by the caller or automatically detected) is not supported by the
144156
// called API method, an `INVALID_ARGUMENT` error is returned.
145157
string language = 4;
158+
159+
// The web URI where the document comes from. This URI is not used for
160+
// fetching the content, but as a hint for analyzing the document.
161+
string reference_web_uri = 5;
162+
163+
// Indicates how detected boilerplate(e.g. advertisements, copyright
164+
// declarations, banners) should be handled for this document. If not
165+
// specified, boilerplate will be treated the same as content.
166+
BoilerplateHandling boilerplate_handling = 6;
146167
}
147168

148169
// Represents a sentence in the input document.
@@ -156,6 +177,32 @@ message Sentence {
156177
Sentiment sentiment = 2;
157178
}
158179

180+
// Represents the text encoding that the caller uses to process the output.
181+
// Providing an `EncodingType` is recommended because the API provides the
182+
// beginning offsets for various outputs, such as tokens and mentions, and
183+
// languages that natively use different text encodings may access offsets
184+
// differently.
185+
enum EncodingType {
186+
// If `EncodingType` is not specified, encoding-dependent information (such as
187+
// `begin_offset`) will be set at `-1`.
188+
NONE = 0;
189+
190+
// Encoding-dependent information (such as `begin_offset`) is calculated based
191+
// on the UTF-8 encoding of the input. C++ and Go are examples of languages
192+
// that use this encoding natively.
193+
UTF8 = 1;
194+
195+
// Encoding-dependent information (such as `begin_offset`) is calculated based
196+
// on the UTF-16 encoding of the input. Java and JavaScript are examples of
197+
// languages that use this encoding natively.
198+
UTF16 = 2;
199+
200+
// Encoding-dependent information (such as `begin_offset`) is calculated based
201+
// on the UTF-32 encoding of the input. Python is an example of a language
202+
// that uses this encoding natively.
203+
UTF32 = 3;
204+
}
205+
159206
// Represents a phrase in the text that is a known entity, such as
160207
// a person, an organization, or location. The API associates information, such
161208
// as salience and mentions, with entities.
@@ -286,32 +333,6 @@ message Token {
286333
string lemma = 4;
287334
}
288335

289-
// Represents the text encoding that the caller uses to process the output.
290-
// Providing an `EncodingType` is recommended because the API provides the
291-
// beginning offsets for various outputs, such as tokens and mentions, and
292-
// languages that natively use different text encodings may access offsets
293-
// differently.
294-
enum EncodingType {
295-
// If `EncodingType` is not specified, encoding-dependent information (such as
296-
// `begin_offset`) will be set at `-1`.
297-
NONE = 0;
298-
299-
// Encoding-dependent information (such as `begin_offset`) is calculated based
300-
// on the UTF-8 encoding of the input. C++ and Go are examples of languages
301-
// that use this encoding natively.
302-
UTF8 = 1;
303-
304-
// Encoding-dependent information (such as `begin_offset`) is calculated based
305-
// on the UTF-16 encoding of the input. Java and JavaScript are examples of
306-
// languages that use this encoding natively.
307-
UTF16 = 2;
308-
309-
// Encoding-dependent information (such as `begin_offset`) is calculated based
310-
// on the UTF-32 encoding of the input. Python is an example of a language
311-
// that uses this encoding natively.
312-
UTF32 = 3;
313-
}
314-
315336
// Represents the feeling associated with the entire text or entities in
316337
// the text.
317338
// Next ID: 6
@@ -968,6 +989,45 @@ message ClassificationCategory {
968989
float confidence = 2;
969990
}
970991

992+
// Model options available for classification requests.
993+
message ClassificationModelOptions {
994+
// Options for the V1 model.
995+
message V1Model {
996+
997+
}
998+
999+
// Options for the V2 model.
1000+
message V2Model {
1001+
// The content categories used for classification.
1002+
enum ContentCategoriesVersion {
1003+
// If `ContentCategoriesVersion` is not specified, this option will
1004+
// default to `V1`.
1005+
CONTENT_CATEGORIES_VERSION_UNSPECIFIED = 0;
1006+
1007+
// Legacy content categories of our initial launch in 2017.
1008+
V1 = 1;
1009+
1010+
// Updated content categories in 2022.
1011+
V2 = 2;
1012+
}
1013+
1014+
// The content categories used for classification.
1015+
ContentCategoriesVersion content_categories_version = 1;
1016+
}
1017+
1018+
// If this field is not set, then the `v1_model` will be used by default.
1019+
oneof model_type {
1020+
// Setting this field will use the V1 model and V1 content categories
1021+
// version. The V1 model is a legacy model; support for this will be
1022+
// discontinued in the future.
1023+
V1Model v1_model = 1;
1024+
1025+
// Setting this field will use the V2 model with the appropriate content
1026+
// categories version. The V2 model is a better performing model.
1027+
V2Model v2_model = 2;
1028+
}
1029+
}
1030+
9711031
// The sentiment analysis request message.
9721032
message AnalyzeSentimentRequest {
9731033
// Required. Input document.
@@ -1059,6 +1119,10 @@ message AnalyzeSyntaxResponse {
10591119
message ClassifyTextRequest {
10601120
// Required. Input document.
10611121
Document document = 1 [(google.api.field_behavior) = REQUIRED];
1122+
1123+
// Model options to use for classification. Defaults to v1 options if not
1124+
// specified.
1125+
ClassificationModelOptions classification_model_options = 3;
10621126
}
10631127

10641128
// The document classification response message.
@@ -1072,7 +1136,7 @@ message ClassifyTextResponse {
10721136
message AnnotateTextRequest {
10731137
// All available features for sentiment, syntax, and semantic analysis.
10741138
// Setting each one to true will enable that specific analysis for the input.
1075-
// Next ID: 10
1139+
// Next ID: 11
10761140
message Features {
10771141
// Extract syntax information.
10781142
bool extract_syntax = 1;
@@ -1091,6 +1155,10 @@ message AnnotateTextRequest {
10911155
// [predefined
10921156
// taxonomy](https://cloud.google.com/natural-language/docs/categories).
10931157
bool classify_text = 6;
1158+
1159+
// The model options to use for classification. Defaults to v1 options
1160+
// if not specified. Only used if `classify_text` is set to true.
1161+
ClassificationModelOptions classification_model_options = 10;
10941162
}
10951163

10961164
// Required. Input document.
Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,21 @@
11
type: google.api.Service
2-
config_version: 2
2+
config_version: 3
33
name: language.googleapis.com
4-
title: Google Cloud Natural Language API
4+
title: Cloud Natural Language API
55

66
apis:
77
- name: google.cloud.language.v1beta2.LanguageService
88

99
documentation:
10-
summary:
11-
'Google Cloud Natural Language API provides natural language understanding
12-
technologies to developers. Examples include sentiment analysis, entity
13-
recognition, and text annotations.'
10+
summary: |-
11+
Provides natural language understanding technologies, such as sentiment
12+
analysis, entity recognition, entity sentiment analysis, and other text
13+
annotations, to developers.
1414
1515
authentication:
1616
rules:
17-
- selector: '*'
17+
- selector: 'google.cloud.language.v1beta2.LanguageService.*'
1818
oauth:
19-
canonical_scopes: https://www.googleapis.com/auth/cloud-platform
19+
canonical_scopes: |-
20+
https://www.googleapis.com/auth/cloud-language,
21+
https://www.googleapis.com/auth/cloud-platform

0 commit comments

Comments
 (0)