[LLGA] add RECORD_FUNCTION in bridge code (#143)

chunyuan-w · web-flow · commit ccf8cb8f23ca · 2021-08-06T12:58:20.000+08:00
diff --git a/torch_ipex/csrc/jit/codegen/onednn/interface.cpp b/torch_ipex/csrc/jit/codegen/onednn/interface.cpp
@@ -91,7 +91,9 @@ void fuseGraph(std::shared_ptr<Graph> &g) {
 Operation createLlgaKernel(const Node *node) {
   auto kernel = std::make_shared<fuser::onednn::LlgaKernel>(node);
   return [kernel](Stack *stack) {
+#if defined(IPEX_PROFILE_OP)
     RECORD_FUNCTION(kernel->profileName(), std::vector<c10::IValue>());
+#endif
     kernel->run(*stack);
     return 0;
   };
@@ -105,6 +107,9 @@ RegisterOperators LLGAFusionGroupOp({
 
 Operation createLlgaGuardKernel(const Node *node) {
   return [node](Stack *stack) {
+#if defined(IPEX_PROFILE_OP)
+    RECORD_FUNCTION(fuser::onednn::LlgaGuardName(), std::vector<c10::IValue>());
+#endif
     GRAPH_DEBUG("Guarding node: ", node->kind().toQualString());
     std::vector<TypePtr> types = node->tys(attr::types);
     const auto num_inputs = types.size();
diff --git a/torch_ipex/csrc/jit/codegen/onednn/kernel.cpp b/torch_ipex/csrc/jit/codegen/onednn/kernel.cpp
@@ -124,6 +124,9 @@ ArgSpecs LlgaKernel::specializeOutputSpecs(const partition &partition,
 std::tuple<RunArgs, RunArgs>
 LlgaKernel::prepareRunArgs(const TensorArgs &inputs,
                            TensorArgs &outputs) const {
+#if defined(IPEX_PROFILE_OP)
+  RECORD_FUNCTION("LLGA_bridge::prepareRunArgs", std::vector<c10::IValue>({}));
+#endif
   RunArgs runInputs, runOutputs;
   for (size_t i = 0; i < nInputs_; i++) {
     auto spec = inputSpecs_[i];