3232 This feature is available starting with the 2.6 release.
3333"""
3434
35- from time import perf_counter
36-
3735######################################################################
3836# Steps
3937# -----
4644#
4745# First, let's import the necessary libraries for loading our data:
4846#
49- #
5047
5148import torch
5249torch .set_grad_enabled (False )
5350
51+ from time import perf_counter
5452
55- ##########################################################
53+ ###################################################################################
54+ # Defining the Neural Network
55+ # ---------------------------
56+ #
5657# We will use the same neural network structure as the regional compilation recipe.
5758#
5859# We will use a network, composed of repeated layers. This mimics a
@@ -92,7 +93,10 @@ def forward(self, x):
9293 return x
9394
9495
95- ####################################################
96+ ##################################################################################
97+ # Compiling the model ahead-of-time
98+ # ---------------------------------
99+ #
96100# Since we're compiling the model ahead-of-time, we need to prepare representative
97101# input examples, that we expect the model to see during actual deployments.
98102#
@@ -104,22 +108,25 @@ def forward(self, x):
104108output = model (input )
105109print (f"{ output .shape = } " )
106110
107- ####################################################
111+ ###############################################################################################
108112# Now, let's compile our model ahead-of-time. We will use ``input`` created above to pass
109113# to ``torch.export``. This will yield a ``torch.export.ExportedProgram`` which we can compile.
110114
111115path = torch ._inductor .aoti_compile_and_package (
112116 torch .export .export (model , args = (input ,))
113117)
114118
115- ####################################################
119+ #################################################################
116120# We can load from this ``path`` and use it to perform inference.
117121
118122compiled_binary = torch ._inductor .aoti_load_package (path )
119123output_compiled = compiled_binary (input )
120124print (f"{ output_compiled .shape = } " )
121125
122- ###################################################
126+ ######################################################################################
127+ # Compiling _regions_ of the model ahead-of-time
128+ # ----------------------------------------------
129+ #
123130# Compiling model regions ahead-of-time, on the other hand, requires a few key changes.
124131#
125132# Since the compute pattern is shared by all the blocks that
@@ -166,13 +173,10 @@ def forward(self, x):
166173# reducing the cold start times.
167174
168175###################################################
176+ # Measuring compilation time
177+ # --------------------------
169178# Next, let's measure the compilation time of the full model and the regional compilation.
170179#
171- # ``torch.compile`` is a JIT compiler, which means that it compiles on the first invocation.
172- # In the code below, we measure the total time spent in the first invocation. While this method is not
173- # precise, it provides a good estimate since the majority of the time is spent in
174- # compilation.
175-
176180
177181def measure_compile_time (input , regional = False ):
178182 start = perf_counter ()
0 commit comments