Skip to content

Commit 551c6fd

Browse files
committed
headings and subheadings.
1 parent 702b218 commit 551c6fd

File tree

1 file changed

+17
-13
lines changed

1 file changed

+17
-13
lines changed

recipes_source/regional_aot.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,6 @@
3232
This feature is available starting with the 2.6 release.
3333
"""
3434

35-
from time import perf_counter
36-
3735
######################################################################
3836
# Steps
3937
# -----
@@ -46,13 +44,16 @@
4644
#
4745
# First, let's import the necessary libraries for loading our data:
4846
#
49-
#
5047

5148
import torch
5249
torch.set_grad_enabled(False)
5350

51+
from time import perf_counter
5452

55-
##########################################################
53+
###################################################################################
54+
# Defining the Neural Network
55+
# ---------------------------
56+
#
5657
# We will use the same neural network structure as the regional compilation recipe.
5758
#
5859
# We will use a network, composed of repeated layers. This mimics a
@@ -92,7 +93,10 @@ def forward(self, x):
9293
return x
9394

9495

95-
####################################################
96+
##################################################################################
97+
# Compiling the model ahead-of-time
98+
# ---------------------------------
99+
#
96100
# Since we're compiling the model ahead-of-time, we need to prepare representative
97101
# input examples, that we expect the model to see during actual deployments.
98102
#
@@ -104,22 +108,25 @@ def forward(self, x):
104108
output = model(input)
105109
print(f"{output.shape=}")
106110

107-
####################################################
111+
###############################################################################################
108112
# Now, let's compile our model ahead-of-time. We will use ``input`` created above to pass
109113
# to ``torch.export``. This will yield a ``torch.export.ExportedProgram`` which we can compile.
110114

111115
path = torch._inductor.aoti_compile_and_package(
112116
torch.export.export(model, args=(input,))
113117
)
114118

115-
####################################################
119+
#################################################################
116120
# We can load from this ``path`` and use it to perform inference.
117121

118122
compiled_binary = torch._inductor.aoti_load_package(path)
119123
output_compiled = compiled_binary(input)
120124
print(f"{output_compiled.shape=}")
121125

122-
###################################################
126+
######################################################################################
127+
# Compiling _regions_ of the model ahead-of-time
128+
# ----------------------------------------------
129+
#
123130
# Compiling model regions ahead-of-time, on the other hand, requires a few key changes.
124131
#
125132
# Since the compute pattern is shared by all the blocks that
@@ -166,13 +173,10 @@ def forward(self, x):
166173
# reducing the cold start times.
167174

168175
###################################################
176+
# Measuring compilation time
177+
# --------------------------
169178
# Next, let's measure the compilation time of the full model and the regional compilation.
170179
#
171-
# ``torch.compile`` is a JIT compiler, which means that it compiles on the first invocation.
172-
# In the code below, we measure the total time spent in the first invocation. While this method is not
173-
# precise, it provides a good estimate since the majority of the time is spent in
174-
# compilation.
175-
176180

177181
def measure_compile_time(input, regional=False):
178182
start = perf_counter()

0 commit comments

Comments
 (0)