forked from deepspeedai/DeepSpeed
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathspatial_cuda_layers.h
More file actions
32 lines (25 loc) · 833 Bytes
/
spatial_cuda_layers.h
File metadata and controls
32 lines (25 loc) · 833 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
#if __CUDA_ARCH__ >= 530
#define HALF_PRECISION_AVAILABLE = 1
#endif
#include <cooperative_groups.h>
#include <cuda.h>
#include <cuda_fp16.h>
/*********** Group Norm Kernels, Structs, and Helpers ************/
struct {
int64_t batch_size;
int64_t seq_len;
int64_t channels;
} typedef ChannelsLastProblem;
void launch_opt_bias_add(__half* result,
const __half* activation,
const __half* bias,
const __half* other,
const __half* other_bias,
int batch_size,
int seq_len,
int channels,
cudaStream_t stream);