Skip to content

Commit 3c21d89

Browse files
authored
Merge pull request hardkernel#44 from yan-wyb/khadas-vims-4.9.y
update npu driver to 6.4.4.3
2 parents 814f263 + 8ce7dc7 commit 3c21d89

72 files changed

Lines changed: 6788 additions & 1005 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

drivers/amlogic/npu/inc/drvi/gc_vsc_drvi_interface.h

Lines changed: 68 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,10 @@
6868
/* 0.0.1.45 Add a new enumeration for VIR_SymFlagExt 05/15/2020 */
6969
/* 0.0.1.46 Add a minimum workGroupSize in VIR_ComputeLayout 05/19/2020 */
7070
/* 0.0.1.47 Save the HW specific attributes in VIR_Shader 05/26/2020 */
71-
#define gcdVIR_SHADER_BINARY_FILE_VERSION gcmCC(SHADER_64BITMODE, 0, 1, 47)
72-
#define gcdVIR_PROGRAM_BINARY_FILE_VERSION gcmCC(SHADER_64BITMODE, 0, 1, 47)
71+
/* 0.0.1.48 Add a new variable to save the symbol ID of the register spill base address in VIR_Shader 07/13/2020 */
72+
/* 0.0.1.49 Save the RA instruction ID in VIR_Instruction 08/11/2020 */
73+
#define gcdVIR_SHADER_BINARY_FILE_VERSION gcmCC(SHADER_64BITMODE, 0, 1, 49)
74+
#define gcdVIR_PROGRAM_BINARY_FILE_VERSION gcmCC(SHADER_64BITMODE, 0, 1, 49)
7375

7476
#if !defined(gcdTARGETHOST_BIGENDIAN)
7577
#define gcdTARGETHOST_BIGENDIAN 0 /* default host little endian, to change the
@@ -523,9 +525,16 @@ typedef union _VSC_Image_desc {
523525
/* the first 4 32-bits are the same as HW imge_desc as of V630 */
524526
gctUINT baseAddress; /* base address of image data */
525527
gctUINT row_stride; /* the row stride (byte) of the image */
528+
529+
#if !gcdENDIAN_BIG
526530
gctUINT width : 16; /* the width of image (pixels) */
527531
gctUINT height : 16; /* the height of image (rows) */
532+
#else
533+
gctUINT height : 16; /* the height of image (rows) */
534+
gctUINT width : 16; /* the width of image (pixels) */
535+
#endif
528536

537+
#if !gcdENDIAN_BIG
529538
gctUINT shift : 3; /* Shift value for index. */
530539
gctUINT multiply : 1; /* Value to multiply index with. */
531540
gctUINT addressing : 2; /* Addressing mode for LOAD_IMG and STORE_IMG. */
@@ -542,16 +551,52 @@ typedef union _VSC_Image_desc {
542551
gctUINT reserved0 : 1;
543552
gctUINT swizzleA : 3; /* swizzle for alpha */
544553
gctUINT reserved1 : 1;
554+
#else
555+
gctUINT reserved1 : 1;
556+
gctUINT swizzleA : 3; /* swizzle for alpha */
557+
gctUINT reserved0 : 1;
558+
gctUINT swizzleB : 3; /* swizzle for blue */
559+
gctUINT imageId2 : 1; /* ImageID bit2. */
560+
gctUINT swizzleG : 3; /* swizzle for green */
561+
gctUINT imageId1 : 1; /* ImageID bit1. */
562+
gctUINT swizzleR : 3; /* swizzle for red */
563+
gctUINT componentCount : 2; /* Component count. */
564+
gctUINT imageId0 : 1; /* ImageID bit0. */
565+
gctUINT image1Dor2D : 1; /* 1D or 2D image */
566+
gctUINT titling : 2; /* titling */
567+
gctUINT conversion : 4; /* Conversion format. */
568+
gctUINT addressing : 2; /* Addressing mode for LOAD_IMG and STORE_IMG. */
569+
gctUINT multiply : 1; /* Value to multiply index with. */
570+
gctUINT shift : 3; /* Shift value for index. */
571+
#endif
545572

546573
/* following data are used by SW to calculate 3D image slice image address
547574
* and image query data */
548575
gctUINT sliceSize; /* slice size for image 3D */
576+
577+
#if !gcdENDIAN_BIG
549578
gctUINT depth_arraySize : 16; /* depth for image 3D, or array_size for image1D/2D array */
550579
gctUINT imageType : 16; /* vscImageValueType: 1D: 0, 1D_buffer: 1, 1D_array: 2, 2D: 3, 2D_array: 4, 3D: 5 */
580+
#else
581+
gctUINT imageType : 16; /* vscImageValueType: 1D: 0, 1D_buffer: 1, 1D_array: 2, 2D: 3, 2D_array: 4, 3D: 5 */
582+
gctUINT depth_arraySize : 16; /* depth for image 3D, or array_size for image1D/2D array */
583+
#endif
584+
585+
#if !gcdENDIAN_BIG
551586
gctUINT channelOrder : 16; /* image channel order */
552587
gctUINT channelDataType : 16; /* image channel data type */
588+
#else
589+
gctUINT channelDataType : 16; /* image channel data type */
590+
gctUINT channelOrder : 16; /* image channel order */
591+
#endif
592+
593+
#if !gcdENDIAN_BIG
553594
gctUINT imageValueType : 2; /* vscImageValueType (float/int/uint), filled by compiler */
554595
gctUINT reserved2 : 30;
596+
#else
597+
gctUINT reserved2 : 30;
598+
gctUINT imageValueType : 2; /* vscImageValueType (float/int/uint), filled by compiler */
599+
#endif
555600
} sd; /* structured data */
556601
gctUINT rawbits[8];
557602
} VSC_ImageDesc;
@@ -748,7 +793,8 @@ typedef struct _VSC_HW_CONFIG
748793
gctUINT hasAtomTimingFix : 1;
749794
gctUINT hasUSCAtomicFix2 : 1;
750795
gctUINT hasFloatingMadFix : 1;
751-
gctUINT reserved1 : 27;
796+
gctUINT hasA0WriteEnableFix : 1;
797+
gctUINT reserved1 : 26;
752798

753799
/* Last word */
754800
/* Followings will be removed after shader programming is removed out of VSC */
@@ -779,7 +825,9 @@ typedef struct _VSC_HW_CONFIG
779825
gctUINT maxVaryingCount;
780826
gctUINT maxAttributeCount;
781827
gctUINT maxRenderTargetCount;
828+
gctUINT maxShaderCountPerCore;
782829
gctUINT maxGPRCountPerCore;
830+
gctUINT maxGPRCountPerShader;
783831
gctUINT maxGPRCountPerThread;
784832
gctUINT maxHwNativeTotalInstCount;
785833
gctUINT maxTotalInstCount;
@@ -858,8 +906,10 @@ typedef gcsGLSLCaps VSC_GL_API_CONFIG, *PVSC_GL_API_CONFIG;
858906
#define VSC_COMPILER_OPT_DUAL16 0x0000000000010000ULL
859907
#define VSC_COMPILER_OPT_ILF_LINK 0x0000000000020000ULL
860908
#define VSC_COMPILER_OPT_LOOP 0x0000000000040000ULL
909+
#define VSC_COMPILER_OPT_SCPP 0x0000000000080000ULL
910+
#define VSC_COMPILER_OPT_CPF 0x0000000000100000ULL
861911

862-
#define VSC_COMPILER_OPT_FULL 0x000000000007FFFFULL
912+
#define VSC_COMPILER_OPT_FULL 0x00000000001FFFFFULL
863913

864914
#define VSC_COMPILER_OPT_NO_ALGE_SIMP 0x0000000100000000ULL
865915
#define VSC_COMPILER_OPT_NO_GCP 0x0000000200000000ULL
@@ -880,8 +930,10 @@ typedef gcsGLSLCaps VSC_GL_API_CONFIG, *PVSC_GL_API_CONFIG;
880930
#define VSC_COMPILER_OPT_NO_DUAL16 0x0001000000000000ULL
881931
#define VSC_COMPILER_OPT_NO_ILF_LINK 0x0002000000000000ULL
882932
#define VSC_COMPILER_OPT_NO_LOOP 0x0004000000000000ULL
933+
#define VSC_COMPILER_OPT_NO_SCPP 0x0008000000000000ULL
934+
#define VSC_COMPILER_OPT_NO_CPF 0x0010000000000000ULL
883935

884-
#define VSC_COMPILER_OPT_NO_OPT 0x0007FFFF00000000ULL
936+
#define VSC_COMPILER_OPT_NO_OPT 0x001FFFFF00000000ULL
885937

886938
/* Compiler flag for special purpose */
887939
#define VSC_COMPILER_FLAG_COMPILE_TO_HL 0x00000001 /* Compile IR to HL, including doing all opts in HL */
@@ -1022,13 +1074,23 @@ typedef struct _VSC_SHADER_RESOURCE_LAYOUT
10221074
VSC_SHADER_PUSH_CONSTANT_RANGE* pPushConstantRanges;
10231075
}VSC_SHADER_RESOURCE_LAYOUT;
10241076

1077+
/* Some special HW settings which are maintained by driver's adapter/device. */
1078+
typedef struct _VSC_SPECIFIC_HW_SETTING
1079+
{
1080+
/* How many clusters are enabled. */
1081+
gctUINT32 activeClusterCount;
1082+
}VSC_SPECIFIC_HW_SETTING;
1083+
10251084
/* In general, a core system contex is maintained by driver's adapter/device who can
10261085
designate a GPU chip, which means core system contex is GPU wide global context. */
10271086
typedef struct _VSC_CORE_SYS_CONTEXT
10281087
{
10291088
/* Designates a target HW */
10301089
VSC_HW_CONFIG hwCfg;
10311090

1091+
/* Specific HW setting. */
1092+
VSC_SPECIFIC_HW_SETTING specificHwSetting;
1093+
10321094
/* VSC private data, maintained by vscCreatePrivateData and vscDestroyPrivateData */
10331095
VSC_PRIV_DATA_HANDLE hPrivData;
10341096
}VSC_CORE_SYS_CONTEXT, *PVSC_CORE_SYS_CONTEXT;
@@ -1384,7 +1446,7 @@ VSC_OCLImgLibKind vscGetOCLImgLibKindForHWCfg(
13841446

13851447
/* Return the max free reg count for this HW config. */
13861448
gctUINT
1387-
vscGetHWMaxFreeRegCount(
1449+
vscGetHWMaxFreeRegCountPerShader(
13881450
IN VSC_HW_CONFIG *pHwCfg
13891451
);
13901452

drivers/amlogic/npu/inc/drvi/gc_vsc_drvi_shader_priv_mapping.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ typedef enum SHS_PRIV_CONSTANT_KIND
5555
SHS_PRIV_CONSTANT_KIND_COMPUTE_GROUP_NUM_FOR_SINGLE_GPU = 25,
5656
SHS_PRIV_CONSTANT_KIND_VIEW_INDEX = 26,
5757
SHS_PRIV_CONSTANT_KIND_DEFAULT_UBO_ADDRESS = 27,
58+
SHS_PRIV_CONSTANT_KIND_THREAD_ID_MEM_ADDR = 28,
59+
SHS_PRIV_CONSTANT_KIND_ENQUEUED_LOCAL_SIZE = 29,
5860
SHS_PRIV_CONSTANT_KIND_COUNT, /* last member, add new kind beofre this */
5961
}SHS_PRIV_CONSTANT_KIND;
6062

drivers/amlogic/npu/inc/drvi/gc_vsc_drvi_shader_profile.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,10 @@ typedef enum SHADER_IO_USAGE
188188
/* A special usage which means IO is used by general purpose */
189189
SHADER_IO_USAGE_GENERAL = 43,
190190

191-
/* Add NEW usages here */
191+
/* For GPGPU client only */
192+
SHADER_IO_USAGE_CLUSTER_ID = 44,
193+
194+
/* Add NEW usages before here, and make sure update strUsageName too. */
192195

193196
/* Must be at last!!!!!!! */
194197
SHADER_IO_USAGE_TOTAL_COUNT,
@@ -200,6 +203,7 @@ SHADER_IO_USAGE;
200203
((usage) == SHADER_IO_USAGE_ISFRONTFACE) || \
201204
((usage) == SHADER_IO_USAGE_SAMPLE_MASK) || \
202205
((usage) == SHADER_IO_USAGE_SAMPLE_POSITION) || \
206+
((usage) == SHADER_IO_USAGE_CLUSTER_ID) || \
203207
((usage) >= SHADER_IO_USAGE_POINT_COORD && (usage) <= SHADER_IO_USAGE_INSTANCING_ID))
204208

205209
#define IS_SHADER_IO_USAGE_SIV(usage) \
@@ -1003,7 +1007,10 @@ typedef struct SHADER_EXECUTABLE_NATIVE_HINTS
10031007

10041008
gctUINT texldHint : 1;
10051009

1006-
gctUINT reserved : 19;
1010+
/* Active cluster count, 4 bits should be enough. */
1011+
gctUINT activeClusterCount: 4;
1012+
1013+
gctUINT reserved : 15;
10071014
} globalStates;
10081015

10091016
union

0 commit comments

Comments
 (0)