task: text-classification
Backend: sagemaker-training
Backend args: {'instance_type': 'ml.m5.2xlarge', 'supported_instructions': 'avx512'}
Number of evaluation samples: All dataset
Fixed parameters:
- dataset: [{'path': 'glue', 'eval_split': 'validation', 'data_keys': {'primary': 'sentence'}, 'ref_keys': ['label'], 'name': 'sst2', 'calibration_split': 'train'}]
- name_or_path:
distilbert-base-uncased-finetuned-sst-2-english
- from_transformers:
True
- calibration:
- method:
percentile
- num_calibration_samples:
128
- calibration_histogram_percentile:
99.999
- method:
Benchmarked parameters:
- framework:
onnxruntime
,pytorch
- quantization_approach:
dynamic
,static
- operators_to_quantize:
['Add', 'MatMul']
,['Add']
- node_exclusion:
[]
,['layernorm', 'gelu', 'residual', 'gather', 'softmax']
- per_channel:
False
,True
- framework_args:
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4}
,{}
- reduce_range:
True
,False
- apply_quantization:
True
,False
Evaluation
Non-time metrics
framework | quantization_approach | operators_to_quantize | node_exclusion | per_channel | framework_args | reduce_range | apply_quantization | accuracy | |
---|---|---|---|---|---|---|---|---|---|
onnxruntime |
None |
None |
None |
None |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
None |
False |
| | 0.911 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 0.898 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 0.893 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 0.490 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 0.901 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 0.898 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 0.893 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 0.490 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 0.901 |
onnxruntime |
dynamic |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 0.911 |
onnxruntime |
dynamic |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 0.911 |
onnxruntime |
dynamic |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 0.911 |
onnxruntime |
dynamic |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 0.911 |
onnxruntime |
dynamic |
['Add'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 0.911 |
onnxruntime |
dynamic |
['Add'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 0.911 |
onnxruntime |
dynamic |
['Add'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 0.911 |
onnxruntime |
dynamic |
['Add'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 0.911 |
onnxruntime |
static |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 0.899 |
onnxruntime |
static |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 0.899 |
onnxruntime |
static |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 0.491 |
onnxruntime |
static |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 0.908 |
onnxruntime |
static |
['Add', 'MatMul'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 0.899 |
onnxruntime |
static |
['Add', 'MatMul'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 0.899 |
onnxruntime |
static |
['Add', 'MatMul'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 0.499 |
onnxruntime |
static |
['Add', 'MatMul'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 0.900 |
onnxruntime |
static |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 0.906 |
onnxruntime |
static |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 0.906 |
onnxruntime |
static |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 0.906 |
onnxruntime |
static |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 0.906 |
onnxruntime |
static |
['Add'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 0.901 |
onnxruntime |
static |
['Add'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 0.901 |
onnxruntime |
static |
['Add'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 0.901 |
onnxruntime |
static |
['Add'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 0.901 |
pytorch |
None |
None |
None |
None |
{} |
None |
None |
| | 0.911 |
Time metrics
Time benchmarks were run for 15 seconds per config.
Below, time metrics for batch size = 1, input length = 32.
framework | quantization_approach | operators_to_quantize | node_exclusion | per_channel | framework_args | reduce_range | apply_quantization | latency_mean (ms) | throughput (/s) | ||
---|---|---|---|---|---|---|---|---|---|---|---|
onnxruntime |
None |
None |
None |
None |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
None |
False |
| | 14.50 | | | 69.00 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 10.19 | | | 98.13 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 10.66 | | | 93.87 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 10.45 | | | 95.67 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 10.72 | | | 93.33 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 10.40 | | | 96.20 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 10.16 | | | 98.40 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 10.40 | | | 96.20 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 10.86 | | | 92.07 |
onnxruntime |
dynamic |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 14.43 | | | 69.33 |
onnxruntime |
dynamic |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 14.68 | | | 68.13 |
onnxruntime |
dynamic |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 14.40 | | | 69.47 |
onnxruntime |
dynamic |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 14.79 | | | 67.60 |
onnxruntime |
dynamic |
['Add'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 14.80 | | | 67.60 |
onnxruntime |
dynamic |
['Add'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 14.13 | | | 70.80 |
onnxruntime |
dynamic |
['Add'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 14.54 | | | 68.80 |
onnxruntime |
dynamic |
['Add'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 14.60 | | | 68.53 |
onnxruntime |
static |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 11.23 | | | 89.13 |
onnxruntime |
static |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 11.18 | | | 89.47 |
onnxruntime |
static |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 11.39 | | | 87.87 |
onnxruntime |
static |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 11.31 | | | 88.47 |
onnxruntime |
static |
['Add', 'MatMul'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 13.73 | | | 72.87 |
onnxruntime |
static |
['Add', 'MatMul'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 14.42 | | | 69.40 |
onnxruntime |
static |
['Add', 'MatMul'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 14.09 | | | 71.00 |
onnxruntime |
static |
['Add', 'MatMul'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 13.78 | | | 72.60 |
onnxruntime |
static |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 16.11 | | | 62.13 |
onnxruntime |
static |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 15.97 | | | 62.67 |
onnxruntime |
static |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 15.82 | | | 63.27 |
onnxruntime |
static |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 15.94 | | | 62.73 |
onnxruntime |
static |
['Add'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 19.03 | | | 52.60 |
onnxruntime |
static |
['Add'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 18.99 | | | 52.67 |
onnxruntime |
static |
['Add'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 18.93 | | | 52.87 |
onnxruntime |
static |
['Add'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 18.65 | | | 53.67 |
pytorch |
None |
None |
None |
None |
{} |
None |
None |
| | 31.28 | | | 32.00 |
Below, time metrics for batch size = 1, input length = 64.
framework | quantization_approach | operators_to_quantize | node_exclusion | per_channel | framework_args | reduce_range | apply_quantization | latency_mean (ms) | throughput (/s) | ||
---|---|---|---|---|---|---|---|---|---|---|---|
onnxruntime |
None |
None |
None |
None |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
None |
False |
| | 24.59 | | | 40.67 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 18.67 | | | 53.60 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 19.16 | | | 52.20 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 18.97 | | | 52.73 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 19.29 | | | 51.87 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 19.13 | | | 52.33 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 18.64 | | | 53.67 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 19.01 | | | 52.60 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 18.96 | | | 52.80 |
onnxruntime |
dynamic |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 24.63 | | | 40.67 |
onnxruntime |
dynamic |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 25.28 | | | 39.60 |
onnxruntime |
dynamic |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 24.75 | | | 40.47 |
onnxruntime |
dynamic |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 24.97 | | | 40.07 |
onnxruntime |
dynamic |
['Add'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 25.16 | | | 39.80 |
onnxruntime |
dynamic |
['Add'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 24.49 | | | 40.87 |
onnxruntime |
dynamic |
['Add'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 24.88 | | | 40.20 |
onnxruntime |
dynamic |
['Add'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 25.17 | | | 39.73 |
onnxruntime |
static |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 20.05 | | | 49.93 |
onnxruntime |
static |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 20.76 | | | 48.20 |
onnxruntime |
static |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 20.75 | | | 48.20 |
onnxruntime |
static |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 20.23 | | | 49.47 |
onnxruntime |
static |
['Add', 'MatMul'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 24.79 | | | 40.40 |
onnxruntime |
static |
['Add', 'MatMul'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 25.17 | | | 39.73 |
onnxruntime |
static |
['Add', 'MatMul'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 24.14 | | | 41.47 |
onnxruntime |
static |
['Add', 'MatMul'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 25.27 | | | 39.60 |
onnxruntime |
static |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 27.97 | | | 35.80 |
onnxruntime |
static |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 27.43 | | | 36.47 |
onnxruntime |
static |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 28.17 | | | 35.53 |
onnxruntime |
static |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 28.16 | | | 35.53 |
onnxruntime |
static |
['Add'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 33.24 | | | 30.13 |
onnxruntime |
static |
['Add'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 32.46 | | | 30.87 |
onnxruntime |
static |
['Add'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 32.39 | | | 30.93 |
onnxruntime |
static |
['Add'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 32.75 | | | 30.53 |
pytorch |
None |
None |
None |
None |
{} |
None |
None |
| | 41.25 | | | 24.27 |
Below, time metrics for batch size = 1, input length = 128.
framework | quantization_approach | operators_to_quantize | node_exclusion | per_channel | framework_args | reduce_range | apply_quantization | latency_mean (ms) | throughput (/s) | ||
---|---|---|---|---|---|---|---|---|---|---|---|
onnxruntime |
None |
None |
None |
None |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
None |
False |
| | 46.51 | | | 21.53 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 35.33 | | | 28.33 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 35.92 | | | 27.87 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 35.56 | | | 28.13 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 36.32 | | | 27.53 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 35.53 | | | 28.20 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 35.96 | | | 27.87 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 35.42 | | | 28.27 |
onnxruntime |
dynamic |
['Add', 'MatMul'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 36.06 | | | 27.80 |
onnxruntime |
dynamic |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 47.40 | | | 21.13 |
onnxruntime |
dynamic |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 47.14 | | | 21.27 |
onnxruntime |
dynamic |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 47.46 | | | 21.13 |
onnxruntime |
dynamic |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 47.26 | | | 21.20 |
onnxruntime |
dynamic |
['Add'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 47.48 | | | 21.07 |
onnxruntime |
dynamic |
['Add'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 47.08 | | | 21.27 |
onnxruntime |
dynamic |
['Add'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 47.02 | | | 21.33 |
onnxruntime |
dynamic |
['Add'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 47.05 | | | 21.27 |
onnxruntime |
static |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 39.63 | | | 25.27 |
onnxruntime |
static |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 39.52 | | | 25.33 |
onnxruntime |
static |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 39.78 | | | 25.20 |
onnxruntime |
static |
['Add', 'MatMul'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 40.01 | | | 25.00 |
onnxruntime |
static |
['Add', 'MatMul'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 44.24 | | | 22.67 |
onnxruntime |
static |
['Add', 'MatMul'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 44.55 | | | 22.47 |
onnxruntime |
static |
['Add', 'MatMul'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 45.74 | | | 21.87 |
onnxruntime |
static |
['Add', 'MatMul'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 44.12 | | | 22.67 |
onnxruntime |
static |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 51.41 | | | 19.47 |
onnxruntime |
static |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 52.52 | | | 19.07 |
onnxruntime |
static |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 51.25 | | | 19.53 |
onnxruntime |
static |
['Add'] |
['layernorm', 'gelu', 'residual', 'gather', 'softmax'] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 51.51 | | | 19.47 |
onnxruntime |
static |
['Add'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 59.37 | | | 16.87 |
onnxruntime |
static |
['Add'] |
[] |
False |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 58.28 | | | 17.20 |
onnxruntime |
static |
['Add'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
False |
True |
| | 59.37 | | | 16.87 |
onnxruntime |
static |
['Add'] |
[] |
True |
{'opset': 13, 'optimization_level': 1, 'intra_op_num_threads': 4} |
True |
True |
| | 58.28 | | | 17.20 |
pytorch |
None |
None |
None |
None |
{} |
None |
None |
| | 53.72 | | | 18.67 |