Skip to content

Commit 3f57776

Browse files
yoshi-automationtseaver
authored andcommitted
Add v1 object tracking support, v1p3b1 speech transcription / logo recognition support (via synth). (googleapis#8221)
1 parent 78c5a95 commit 3f57776

File tree

6 files changed

+2106
-301
lines changed

6 files changed

+2106
-301
lines changed

videointelligence/google/cloud/videointelligence_v1/proto/video_intelligence.proto

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,9 @@ message VideoContext {
108108

109109
// Config for TEXT_DETECTION.
110110
TextDetectionConfig text_detection_config = 8;
111+
112+
// Config for OBJECT_TRACKING.
113+
ObjectTrackingConfig object_tracking_config = 13;
111114
}
112115

113116
// Config for LABEL_DETECTION.
@@ -126,6 +129,22 @@ message LabelDetectionConfig {
126129
// Supported values: "builtin/stable" (the default if unset) and
127130
// "builtin/latest".
128131
string model = 3;
132+
133+
// The confidence threshold we perform filtering on the labels from
134+
// frame-level detection. If not set, it is set to 0.4 by default. The valid
135+
// range for this threshold is [0.1, 0.9]. Any value set outside of this
136+
// range will be clipped.
137+
// Note: for best results please follow the default threshold. We will update
138+
// the default threshold everytime when we release a new model.
139+
float frame_confidence_threshold = 4;
140+
141+
// The confidence threshold we perform filtering on the labels from
142+
// video-level and shot-level detections. If not set, it is set to 0.3 by
143+
// default. The valid range for this threshold is [0.1, 0.9]. Any value set
144+
// outside of this range will be clipped.
145+
// Note: for best results please follow the default threshold. We will update
146+
// the default threshold everytime when we release a new model.
147+
float video_confidence_threshold = 5;
129148
}
130149

131150
// Config for SHOT_CHANGE_DETECTION.
@@ -155,6 +174,14 @@ message FaceDetectionConfig {
155174
bool include_bounding_boxes = 2;
156175
}
157176

177+
// Config for OBJECT_TRACKING.
178+
message ObjectTrackingConfig {
179+
// Model to use for object tracking.
180+
// Supported values: "builtin/stable" (the default if unset) and
181+
// "builtin/latest".
182+
string model = 1;
183+
}
184+
158185
// Config for TEXT_DETECTION.
159186
message TextDetectionConfig {
160187
// Language hint can be specified if the language to be detected is known a
@@ -163,6 +190,11 @@ message TextDetectionConfig {
163190
//
164191
// Automatic language detection is performed if no hint is provided.
165192
repeated string language_hints = 1;
193+
194+
// Model to use for text detection.
195+
// Supported values: "builtin/stable" (the default if unset) and
196+
// "builtin/latest".
197+
string model = 2;
166198
}
167199

168200
// Video segment.

videointelligence/google/cloud/videointelligence_v1/proto/video_intelligence_pb2.py

Lines changed: 230 additions & 75 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

videointelligence/google/cloud/videointelligence_v1p3beta1/gapic/enums.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,20 @@ class Feature(enum.IntEnum):
2828
LABEL_DETECTION (int): Label detection. Detect objects, such as dog or flower.
2929
SHOT_CHANGE_DETECTION (int): Shot change detection.
3030
EXPLICIT_CONTENT_DETECTION (int): Explicit content detection.
31+
SPEECH_TRANSCRIPTION (int): Speech transcription.
3132
TEXT_DETECTION (int): OCR text detection and tracking.
3233
OBJECT_TRACKING (int): Object detection and tracking.
34+
LOGO_RECOGNITION (int): Logo detection, tracking, and recognition.
3335
"""
3436

3537
FEATURE_UNSPECIFIED = 0
3638
LABEL_DETECTION = 1
3739
SHOT_CHANGE_DETECTION = 2
3840
EXPLICIT_CONTENT_DETECTION = 3
41+
SPEECH_TRANSCRIPTION = 6
3942
TEXT_DETECTION = 7
4043
OBJECT_TRACKING = 9
44+
LOGO_RECOGNITION = 12
4145

4246

4347
class LabelDetectionMode(enum.IntEnum):

0 commit comments

Comments
 (0)