MaxxleLLC
diff --git a/‎videointelligence/google/cloud/videointelligence_v1/proto/video_intelligence.proto‎
Lines changed: 32 additions & 0 deletions b/‎videointelligence/google/cloud/videointelligence_v1/proto/video_intelligence.proto‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎videointelligence/google/cloud/videointelligence_v1/proto/video_intelligence_pb2.py‎
Lines changed: 230 additions & 75 deletions b/‎videointelligence/google/cloud/videointelligence_v1/proto/video_intelligence_pb2.py‎
Lines changed: 230 additions & 75 deletions
diff --git a/‎videointelligence/google/cloud/videointelligence_v1p3beta1/gapic/enums.py‎
Lines changed: 4 additions & 0 deletions b/‎videointelligence/google/cloud/videointelligence_v1p3beta1/gapic/enums.py‎
Lines changed: 4 additions & 0 deletions
@@ -108,6 +108,9 @@ message VideoContext {
 
   // Config for TEXT_DETECTION.
   TextDetectionConfig text_detection_config = 8;
+
+  // Config for OBJECT_TRACKING.
+  ObjectTrackingConfig object_tracking_config = 13;
 }
 
 // Config for LABEL_DETECTION.
@@ -126,6 +129,22 @@ message LabelDetectionConfig {
   // Supported values: "builtin/stable" (the default if unset) and
   // "builtin/latest".
   string model = 3;
+
+  // The confidence threshold we perform filtering on the labels from
+  // frame-level detection. If not set, it is set to 0.4 by default. The valid
+  // range for this threshold is [0.1, 0.9]. Any value set outside of this
+  // range will be clipped.
+  // Note: for best results please follow the default threshold. We will update
+  // the default threshold everytime when we release a new model.
+  float frame_confidence_threshold = 4;
+
+  // The confidence threshold we perform filtering on the labels from
+  // video-level and shot-level detections. If not set, it is set to 0.3 by
+  // default. The valid range for this threshold is [0.1, 0.9]. Any value set
+  // outside of this range will be clipped.
+  // Note: for best results please follow the default threshold. We will update
+  // the default threshold everytime when we release a new model.
+  float video_confidence_threshold = 5;
 }
 
 // Config for SHOT_CHANGE_DETECTION.
@@ -155,6 +174,14 @@ message FaceDetectionConfig {
   bool include_bounding_boxes = 2;
 }
 
+// Config for OBJECT_TRACKING.
+message ObjectTrackingConfig {
+  // Model to use for object tracking.
+  // Supported values: "builtin/stable" (the default if unset) and
+  // "builtin/latest".
+  string model = 1;
+}
+
 // Config for TEXT_DETECTION.
 message TextDetectionConfig {
   // Language hint can be specified if the language to be detected is known a
@@ -163,6 +190,11 @@ message TextDetectionConfig {
   //
   // Automatic language detection is performed if no hint is provided.
   repeated string language_hints = 1;
+
+  // Model to use for text detection.
+  // Supported values: "builtin/stable" (the default if unset) and
+  // "builtin/latest".
+  string model = 2;
 }
 
 // Video segment.
 
@@ -28,16 +28,20 @@ class Feature(enum.IntEnum):
       LABEL_DETECTION (int): Label detection. Detect objects, such as dog or flower.
       SHOT_CHANGE_DETECTION (int): Shot change detection.
       EXPLICIT_CONTENT_DETECTION (int): Explicit content detection.
+      SPEECH_TRANSCRIPTION (int): Speech transcription.
       TEXT_DETECTION (int): OCR text detection and tracking.
       OBJECT_TRACKING (int): Object detection and tracking.
+      LOGO_RECOGNITION (int): Logo detection, tracking, and recognition.
     """
 
     FEATURE_UNSPECIFIED = 0
     LABEL_DETECTION = 1
     SHOT_CHANGE_DETECTION = 2
     EXPLICIT_CONTENT_DETECTION = 3
+    SPEECH_TRANSCRIPTION = 6
     TEXT_DETECTION = 7
     OBJECT_TRACKING = 9
+    LOGO_RECOGNITION = 12
 
 
 class LabelDetectionMode(enum.IntEnum):