You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
), "Only 4-bit per channel group quantization is supported"
239
+
returnXNNDatatype.xnn_datatype_qbint4
240
+
else:
241
+
# 4/8-bit per channel quantized weights
242
+
return (
243
+
XNNDatatype.xnn_datatype_qcint4
244
+
ifquant_params.is_qc4w
245
+
elseXNNDatatype.xnn_datatype_qcint8
246
+
)
238
247
else:
239
248
raiseRuntimeError(
240
249
f"Unable to resolve static quantized tensor dtype using quant params dtype: {quant_params.dtype}, [qmin, qmax]: {quant_params.qmin}, {quant_params.qmax} for per channel quantization"
), "For per_channel_group quant, expecting output channels to match scale.shape[0], gut got: {output_channels}, scale.shape[0]: {quant_params.scale.shape[0]}"
326
+
assert (
327
+
input_channels%num_groups==0
328
+
), "For per_channel_group quant, expecting input channels to be divisible by num_groups, but got ic: {input_channels}, num_groups: {num_groups}"
329
+
assert (
330
+
input_channels%quant_params.group_size==0
331
+
), "For per_channel_group quant, expecting input channels to be divisible by group_size, but got ic: {input_channels}, group_size: {quant_params.group_size}"
0 commit comments