diff --git a/backends/xnnpack/operators/__init__.py b/backends/xnnpack/operators/__init__.py index dcffa42ac3f..9d81b7f8e29 100644 --- a/backends/xnnpack/operators/__init__.py +++ b/backends/xnnpack/operators/__init__.py @@ -30,6 +30,7 @@ op_minimum, op_multiply, op_negate, + op_permute, op_prelu, op_quantize_per_tensor, op_relu, @@ -42,7 +43,6 @@ op_squeeze, op_static_constant_pad, op_static_resize_bilinear_2d, - op_static_transpose, op_sub, op_to_copy, ) diff --git a/backends/xnnpack/operators/op_static_transpose.py b/backends/xnnpack/operators/op_permute.py similarity index 97% rename from backends/xnnpack/operators/op_static_transpose.py rename to backends/xnnpack/operators/op_permute.py index ce1cd43c1ad..0ca92a7a039 100644 --- a/backends/xnnpack/operators/op_static_transpose.py +++ b/backends/xnnpack/operators/op_permute.py @@ -20,7 +20,7 @@ @register_node_visitor -class StaticTransposeVisitor(NodeVisitor): +class PermuteVisitor(NodeVisitor): target = "aten.permute_copy.default" def __init__(self, *args) -> None: diff --git a/backends/xnnpack/operators/op_skip_ops.py b/backends/xnnpack/operators/op_skip_ops.py index 83b6eee32b0..345b7896d34 100644 --- a/backends/xnnpack/operators/op_skip_ops.py +++ b/backends/xnnpack/operators/op_skip_ops.py @@ -113,12 +113,3 @@ class OpSymSizeInt(OpSkipOps): """ target = "sym_size.int" - - -@register_node_visitor -class OpPermuteCopyDefault(OpSkipOps): - """ - do nothing if node is permute_copy.default - """ - - target = "aten.permute_copy.default" diff --git a/backends/xnnpack/runtime/XNNCompiler.cpp b/backends/xnnpack/runtime/XNNCompiler.cpp index 0c1c9e6d42c..8498bd84c5f 100644 --- a/backends/xnnpack/runtime/XNNCompiler.cpp +++ b/backends/xnnpack/runtime/XNNCompiler.cpp @@ -1517,6 +1517,7 @@ __ET_NODISCARD Error XNNCompiler::compileModel( if (!executor->qinputs_.empty() && flatbuffer_graph->xnodes()->size() > 0 && flatbuffer_graph->xnodes()->Get(0)->xnode_union_type() == fb_xnnpack::XNodeUnion::XNNFullyConnected) { +#ifdef ENABLE_DYNAMIC_QUANTIZATION // This delegate is for DQLinear which supports dynamic input shapes if (executor->getNumInputs() < 1 || executor->getNumOutputs() != 1) { ET_LOG( @@ -1525,6 +1526,10 @@ __ET_NODISCARD Error XNNCompiler::compileModel( return Error::NotSupported; } executor->setNeedsResizeOutput(); +#else + ET_LOG(Error, "DQ Linear is not supported"); + return Error::NotSupported; +#endif } return err; diff --git a/backends/xnnpack/runtime/XNNExecutor.cpp b/backends/xnnpack/runtime/XNNExecutor.cpp index 5e39c86c1ba..30b60ee329d 100644 --- a/backends/xnnpack/runtime/XNNExecutor.cpp +++ b/backends/xnnpack/runtime/XNNExecutor.cpp @@ -7,7 +7,9 @@ */ #include +#ifdef ENABLE_DYNAMIC_QUANTIZATION #include +#endif namespace torch { namespace executor { @@ -17,6 +19,7 @@ namespace delegate { Error XNNExecutor::set_external_input(uint32_t id, Tensor* input) { auto qinput_pair = qinputs_.find(id); if (qinput_pair != qinputs_.end()) { +#ifdef ENABLE_DYNAMIC_QUANTIZATION auto qinput = qinput_pair->second; // dq the input and copy it in to qinput float input_min, input_max; @@ -60,6 +63,10 @@ Error XNNExecutor::set_external_input(uint32_t id, Tensor* input) { {static_cast(input_qparam.scale), static_cast(input_qparam.zero_point)}, batch_size}); +#else + ET_LOG(Error, "Dynamic Quantization is not supported"); + return Error::NotSupported; +#endif } else { externals_.emplace_back(xnn_external_value{id, input->mutable_data_ptr()}); } diff --git a/backends/xnnpack/targets.bzl b/backends/xnnpack/targets.bzl index 6df70f654aa..0f80af77cd0 100644 --- a/backends/xnnpack/targets.bzl +++ b/backends/xnnpack/targets.bzl @@ -65,6 +65,7 @@ def define_common_targets(): "//executorch/extension/pybindings/...", "@EXECUTORCH_CLIENTS", ], + preprocessor_flags = [] if runtime.is_oss else ["-DENABLE_DYNAMIC_QUANTIZATION"], deps = [ third_party_dep("XNNPACK"), ":xnnpack_schema", diff --git a/backends/xnnpack/test/ops/add.py b/backends/xnnpack/test/ops/add.py index ee19be67cdd..fe7686d1f99 100644 --- a/backends/xnnpack/test/ops/add.py +++ b/backends/xnnpack/test/ops/add.py @@ -75,9 +75,9 @@ def test_add_quantized_pt2e(self): ( Tester(add_module, model_inputs) + .quantize2() .export() .check_count({"torch.ops.aten.add.Tensor": 4}) - .quantize2() .check(["torch.ops.quantized_decomposed"]) .to_edge() .check_count({"executorch_exir_dialects_edge__ops_aten_add_Tensor": 4}) diff --git a/backends/xnnpack/test/tester/tester.py b/backends/xnnpack/test/tester/tester.py index 23602bde8bf..a736284bd9d 100644 --- a/backends/xnnpack/test/tester/tester.py +++ b/backends/xnnpack/test/tester/tester.py @@ -10,6 +10,7 @@ from typing import Any, Dict, List, Optional, Tuple import torch +import torch._export as export from executorch import exir from executorch.backends.xnnpack.partition.xnnpack_partitioner import ( XnnpackFloatingPointPartitioner, @@ -145,23 +146,23 @@ def __init__( self.quantizer.set_global(self.quantization_config) - self.converted_program = None + self.converted_graph = None def run( - self, artifact: ExirExportedProgram, inputs: Optional[Tuple[torch.Tensor]] + self, artifact: torch.nn.Module, inputs: Optional[Tuple[torch.Tensor]] ) -> None: - prepared = prepare_pt2e(artifact.exported_program.graph_module, self.quantizer) + captured_graph = export.capture_pre_autograd_graph(artifact, inputs) + prepared = prepare_pt2e(captured_graph, self.quantizer) converted = convert_pt2e(prepared) - artifact.exported_program._graph_module = converted - self.converted_program = artifact + self.converted_graph = converted @property - def artifact(self) -> ExirExportedProgram: - return self.converted_program + def artifact(self) -> torch.fx.GraphModule: + return self.converted_graph @property def graph_module(self) -> str: - return self.converted_program.exported_program.graph_module + return self.converted_graph @register_stage @@ -274,12 +275,11 @@ def __init__( self.inputs = inputs self.stages: Dict[str, Stage] = OrderedDict.fromkeys(list(_stages_.keys())) self.pipeline = { + self._stage_name(Quantize2): [self._stage_name(Export)], self._stage_name(Quantize): [self._stage_name(Export)], self._stage_name(Export): [ - self._stage_name(Quantize2), self._stage_name(ToEdge), ], - self._stage_name(Quantize2): [self._stage_name(ToEdge)], self._stage_name(ToEdge): [self._stage_name(Partition)], # TODO Make this Stage optional self._stage_name(Partition): [self._stage_name(ToExecutorch)],