Skip to content

Commit 7b367bd

Browse files
committed
Qualcomm AI Engine Direct - Add QNN support for to_edge_transform_and_lower
summary: - Support `to_edge_transform_and_lower` - Replace capture_program with new API `to_edge_transform_and_lower_to_qnn` - Replace capture_program with to_edge_transform_and_lower_to_qnn for unit_test - Replace capture_program with to_edge_transform_and_lower_to_qnn for examples - Replace capture_program with to_edge_transform_and_lower_to_qnn for llama - Add QnnPassManager to manage all passes in different stage - Deprecated _transform in export_llama_lib with qnn_pass_manager - Add transform_for_export_pipeline for LiftConstantScalarOperands to avoid creating temporary tensors in the operation builder. However, this pass will create a get_attr node, which should be converted into a lifted tensor constant by the lift_constant_tensor_pass. If placed in the to_edge_transform_passes, it will be executed after the lift_constant_tensor_pass, causing the operation builder to fail to correctly retrieve the parameter by the get_parameter for get_attr node. - Refactor the passes - Fix the output dtype doesn't match in runtime after build quant io - Combine constant_i64_to_i32 and tensor_i64_to_i32 into i64_to_i32 - Replace convert_to_linear pass with fixed_linear_keep_dim pass - Since QNN has no keep dims for linear op, we will need to add squeeze and unsqueeze around linear node - Add TagQuantIO pass to tag io nodes to avoid inserting q/dq in qnn_preprocess - Add prelu, leaky_relu, linear, rms_norm into decompose_table - Remove recompose_prelu.py - Remove unused variable in insert_requantize.py, and replace_index_put_input.py - Support aten.split_with_sizes_copy.default - Support leaky_relu with inplace=True
1 parent 7159650 commit 7b367bd

33 files changed

+967
-1179
lines changed

backends/qualcomm/_passes/__init__.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,41 @@
11
from .annotate_decomposed import AnnotateDecomposed
22
from .annotate_quant_attrs import AnnotateQuantAttrs
3-
from .constant_i64_to_i32 import ConstantI64toI32
43
from .convert_bmm_to_matmul import ConvertBmmToMatmul
5-
from .convert_to_linear import ConvertToLinear
64
from .decompose_any import DecomposeAny
75
from .decompose_einsum import DecomposeEinsum
86
from .decompose_linalg_vector_norm import DecomposeLinalgVectorNorm
97
from .decompose_silu import DecomposeSilu
108
from .expand_broadcast_tensor_shape import ExpandBroadcastTensorShape
9+
from .fixed_linear_keep_dim import FixedLinearKeepDim
1110
from .fold_qdq import FoldQDQ
1211
from .fuse_consecutive_transpose import FuseConsecutiveTranspose
12+
from .i64_to_i32 import I64toI32
1313
from .insert_io_qdq import InsertIOQDQ
1414
from .insert_requantize import InsertRequantize
1515
from .layout_transform import LayoutTransform
1616
from .lift_constant_scalar_operands import LiftConstantScalarOperands
1717
from .recompose_pixel_unshuffle import RecomposePixelUnshuffle
18-
from .recompose_prelu import RecomposePReLU
1918
from .recompose_rms_norm import RecomposeRmsNorm
2019
from .reduce_dynamic_range import ReduceDynamicRange
2120
from .remove_redundancy import RemoveRedundancy
2221
from .replace_index_put_input import ReplaceIndexPutInput
2322
from .replace_inf_buffer import ReplaceInfBuffer
24-
from .tensor_i64_to_i32 import TensorI64toI32
23+
from .tag_quant_io import TagQuantIO
2524

2625

2726
__all__ = [
2827
AnnotateDecomposed,
2928
AnnotateQuantAttrs,
30-
ConstantI64toI32,
3129
ConvertBmmToMatmul,
32-
RecomposePReLU,
33-
ConvertToLinear,
3430
DecomposeAny,
3531
DecomposeEinsum,
3632
DecomposeLinalgVectorNorm,
3733
DecomposeSilu,
3834
ExpandBroadcastTensorShape,
35+
FixedLinearKeepDim,
3936
FoldQDQ,
4037
FuseConsecutiveTranspose,
38+
I64toI32,
4139
InsertIOQDQ,
4240
InsertRequantize,
4341
LayoutTransform,
@@ -48,5 +46,5 @@
4846
RemoveRedundancy,
4947
ReplaceIndexPutInput,
5048
ReplaceInfBuffer,
51-
TensorI64toI32,
49+
TagQuantIO,
5250
]

backends/qualcomm/_passes/build_quant_io.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,25 +27,17 @@ def _make_spec(self, x):
2727
return None
2828

2929
def _build(self, graph_module: torch.fx.GraphModule) -> torch.fx.GraphModule:
30-
# forcely update delegate node's meta['spec'] to get correct output
30+
# Forcedly update delegate node's meta['spec'] to get correct output
3131
# tensor size in runtime
3232
call_delegate = [
3333
node
3434
for node in graph_module.graph.nodes
3535
if node.op == "call_function" and node.name == "executorch_call_delegate"
3636
]
3737
assert len(call_delegate) == 1
38-
spec = []
3938
for n in graph_module.graph.nodes:
4039
if QCOM_QUANTIZED_IO in n.meta:
4140
n.meta["val"] = n.meta["val"].to(dtype=n.meta[QCOM_QUANTIZED_IO])
42-
if n.op == "call_function" and "getitem" in n.name:
43-
fake_tensor = n.meta["val"]
44-
if QCOM_QUANTIZED_IO in n.meta:
45-
fake_tensor = fake_tensor.to(dtype=n.meta[QCOM_QUANTIZED_IO])
46-
spec.append(self._make_spec(fake_tensor))
47-
48-
call_delegate[0].meta["spec"] = tuple(spec)
4941

5042
def call(self, graph_module: torch.fx.GraphModule):
5143
self._build(graph_module)

backends/qualcomm/_passes/constant_i64_to_i32.py

Lines changed: 0 additions & 81 deletions
This file was deleted.

backends/qualcomm/_passes/convert_to_linear.py

Lines changed: 0 additions & 231 deletions
This file was deleted.

0 commit comments

Comments
 (0)