diff --git a/backends/xnnpack/operators/__init__.py b/backends/xnnpack/operators/__init__.py
index dcffa42ac3f..9d81b7f8e29 100644
--- a/backends/xnnpack/operators/__init__.py
+++ b/backends/xnnpack/operators/__init__.py
@@ -30,6 +30,7 @@
     op_minimum,
     op_multiply,
     op_negate,
+    op_permute,
     op_prelu,
     op_quantize_per_tensor,
     op_relu,
@@ -42,7 +43,6 @@
     op_squeeze,
     op_static_constant_pad,
     op_static_resize_bilinear_2d,
-    op_static_transpose,
     op_sub,
     op_to_copy,
 )
diff --git a/backends/xnnpack/operators/op_static_transpose.py b/backends/xnnpack/operators/op_permute.py
similarity index 97%
rename from backends/xnnpack/operators/op_static_transpose.py
rename to backends/xnnpack/operators/op_permute.py
index ce1cd43c1ad..0ca92a7a039 100644
--- a/backends/xnnpack/operators/op_static_transpose.py
+++ b/backends/xnnpack/operators/op_permute.py
@@ -20,7 +20,7 @@
 
 
 @register_node_visitor
-class StaticTransposeVisitor(NodeVisitor):
+class PermuteVisitor(NodeVisitor):
     target = "aten.permute_copy.default"
 
     def __init__(self, *args) -> None:
diff --git a/backends/xnnpack/operators/op_skip_ops.py b/backends/xnnpack/operators/op_skip_ops.py
index 83b6eee32b0..345b7896d34 100644
--- a/backends/xnnpack/operators/op_skip_ops.py
+++ b/backends/xnnpack/operators/op_skip_ops.py
@@ -113,12 +113,3 @@ class OpSymSizeInt(OpSkipOps):
     """
 
     target = "sym_size.int"
-
-
-@register_node_visitor
-class OpPermuteCopyDefault(OpSkipOps):
-    """
-    do nothing if node is permute_copy.default
-    """
-
-    target = "aten.permute_copy.default"
diff --git a/backends/xnnpack/runtime/XNNCompiler.cpp b/backends/xnnpack/runtime/XNNCompiler.cpp
index 0c1c9e6d42c..8498bd84c5f 100644
--- a/backends/xnnpack/runtime/XNNCompiler.cpp
+++ b/backends/xnnpack/runtime/XNNCompiler.cpp
@@ -1517,6 +1517,7 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
   if (!executor->qinputs_.empty() && flatbuffer_graph->xnodes()->size() > 0 &&
       flatbuffer_graph->xnodes()->Get(0)->xnode_union_type() ==
           fb_xnnpack::XNodeUnion::XNNFullyConnected) {
+#ifdef ENABLE_DYNAMIC_QUANTIZATION
     // This delegate is for DQLinear which supports dynamic input shapes
     if (executor->getNumInputs() < 1 || executor->getNumOutputs() != 1) {
       ET_LOG(
@@ -1525,6 +1526,10 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
       return Error::NotSupported;
     }
     executor->setNeedsResizeOutput();
+#else
+    ET_LOG(Error, "DQ Linear is not supported");
+    return Error::NotSupported;
+#endif
   }
 
   return err;
diff --git a/backends/xnnpack/runtime/XNNExecutor.cpp b/backends/xnnpack/runtime/XNNExecutor.cpp
index 5e39c86c1ba..30b60ee329d 100644
--- a/backends/xnnpack/runtime/XNNExecutor.cpp
+++ b/backends/xnnpack/runtime/XNNExecutor.cpp
@@ -7,7 +7,9 @@
  */
 
 #include <executorch/backends/xnnpack/runtime/XNNExecutor.h>
+#ifdef ENABLE_DYNAMIC_QUANTIZATION
 #include <executorch/backends/xnnpack/runtime/utils/utils.h>
+#endif
 
 namespace torch {
 namespace executor {
@@ -17,6 +19,7 @@ namespace delegate {
 Error XNNExecutor::set_external_input(uint32_t id, Tensor* input) {
   auto qinput_pair = qinputs_.find(id);
   if (qinput_pair != qinputs_.end()) {
+#ifdef ENABLE_DYNAMIC_QUANTIZATION
     auto qinput = qinput_pair->second;
     // dq the input and copy it in to qinput
     float input_min, input_max;
@@ -60,6 +63,10 @@ Error XNNExecutor::set_external_input(uint32_t id, Tensor* input) {
         {static_cast<float>(input_qparam.scale),
          static_cast<int8_t>(input_qparam.zero_point)},
         batch_size});
+#else
+    ET_LOG(Error, "Dynamic Quantization is not supported");
+    return Error::NotSupported;
+#endif
   } else {
     externals_.emplace_back(xnn_external_value{id, input->mutable_data_ptr()});
   }
diff --git a/backends/xnnpack/targets.bzl b/backends/xnnpack/targets.bzl
index 6df70f654aa..0f80af77cd0 100644
--- a/backends/xnnpack/targets.bzl
+++ b/backends/xnnpack/targets.bzl
@@ -65,6 +65,7 @@ def define_common_targets():
             "//executorch/extension/pybindings/...",
             "@EXECUTORCH_CLIENTS",
         ],
+        preprocessor_flags = [] if runtime.is_oss else ["-DENABLE_DYNAMIC_QUANTIZATION"],
         deps = [
             third_party_dep("XNNPACK"),
             ":xnnpack_schema",
diff --git a/backends/xnnpack/test/ops/add.py b/backends/xnnpack/test/ops/add.py
index ee19be67cdd..fe7686d1f99 100644
--- a/backends/xnnpack/test/ops/add.py
+++ b/backends/xnnpack/test/ops/add.py
@@ -75,9 +75,9 @@ def test_add_quantized_pt2e(self):
 
         (
             Tester(add_module, model_inputs)
+            .quantize2()
             .export()
             .check_count({"torch.ops.aten.add.Tensor": 4})
-            .quantize2()
             .check(["torch.ops.quantized_decomposed"])
             .to_edge()
             .check_count({"executorch_exir_dialects_edge__ops_aten_add_Tensor": 4})
diff --git a/backends/xnnpack/test/tester/tester.py b/backends/xnnpack/test/tester/tester.py
index 23602bde8bf..a736284bd9d 100644
--- a/backends/xnnpack/test/tester/tester.py
+++ b/backends/xnnpack/test/tester/tester.py
@@ -10,6 +10,7 @@
 from typing import Any, Dict, List, Optional, Tuple
 
 import torch
+import torch._export as export
 from executorch import exir
 from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
     XnnpackFloatingPointPartitioner,
@@ -145,23 +146,23 @@ def __init__(
 
         self.quantizer.set_global(self.quantization_config)
 
-        self.converted_program = None
+        self.converted_graph = None
 
     def run(
-        self, artifact: ExirExportedProgram, inputs: Optional[Tuple[torch.Tensor]]
+        self, artifact: torch.nn.Module, inputs: Optional[Tuple[torch.Tensor]]
     ) -> None:
-        prepared = prepare_pt2e(artifact.exported_program.graph_module, self.quantizer)
+        captured_graph = export.capture_pre_autograd_graph(artifact, inputs)
+        prepared = prepare_pt2e(captured_graph, self.quantizer)
         converted = convert_pt2e(prepared)
-        artifact.exported_program._graph_module = converted
-        self.converted_program = artifact
+        self.converted_graph = converted
 
     @property
-    def artifact(self) -> ExirExportedProgram:
-        return self.converted_program
+    def artifact(self) -> torch.fx.GraphModule:
+        return self.converted_graph
 
     @property
     def graph_module(self) -> str:
-        return self.converted_program.exported_program.graph_module
+        return self.converted_graph
 
 
 @register_stage
@@ -274,12 +275,11 @@ def __init__(
         self.inputs = inputs
         self.stages: Dict[str, Stage] = OrderedDict.fromkeys(list(_stages_.keys()))
         self.pipeline = {
+            self._stage_name(Quantize2): [self._stage_name(Export)],
             self._stage_name(Quantize): [self._stage_name(Export)],
             self._stage_name(Export): [
-                self._stage_name(Quantize2),
                 self._stage_name(ToEdge),
             ],
-            self._stage_name(Quantize2): [self._stage_name(ToEdge)],
             self._stage_name(ToEdge): [self._stage_name(Partition)],
             # TODO Make this Stage optional
             self._stage_name(Partition): [self._stage_name(ToExecutorch)],