Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions hls4ml/backends/vivado_accelerator/supported_boards.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"pynq-z2": {
"part": "xc7z020clg400-1",
"tcl_scripts": {"axi_lite": "axi_lite_design.tcl", "axi_stream": "axi_stream_design.tcl"},
"python_drivers": {"axi_stream": "axi_stream_driver.py"},
"tcl_scripts": {"axi_lite": "axi_lite_design.tcl", "axi_stream": "axi_stream_design.tcl", "axi_master": "axi_master_design.tcl"},
"python_drivers": {"axi_stream": "axi_stream_driver.py", "axi_master": "axi_master_driver.py"},
"c_drivers": {}
},
"zcu102": {
Expand Down
8 changes: 8 additions & 0 deletions hls4ml/templates/vivado_accelerator/myproject_axi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ void myproject(

//hls-fpga-machine-learning insert local vars

#ifndef __SYNTHESIS__
static bool loaded_weights = false;
if (!loaded_weights) {
//hls-fpga-machine-learning insert load weights
loaded_weights = true;
}
#endif

//hls-fpga-machine-learning insert enqueue

//hls-fpga-machine-learning insert call
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from pynq import DefaultHierarchy, DefaultIP, allocate
from pynq import Overlay
from datetime import datetime
import pynq.lib.dma
import numpy as np


class NeuralNetworkOverlay(Overlay):
def __init__(self, bitfile_name, x_shape, y_shape, dtype=np.float32, dtbo=None, download=True, ignore_version=False,
device=None):
super().__init__(bitfile_name, dtbo=None, download=True, ignore_version=False, device=None)
self.regin = self.myproject_axi_0.register_map.in_r.address
self.regout = self.myproject_axi_0.register_map.out_r.address
self.ctrl = self.myproject_axi_0.register_map.CTRL
self.input_buffer = allocate(shape=x_shape, dtype=dtype)
self.output_buffer = allocate(shape=y_shape, dtype=dtype)

def _print_dt(self, timea, timeb, N):
dt = (timeb - timea)
dts = dt.seconds + dt.microseconds * 10 ** -6
rate = N / dts
print("Classified {} samples in {} seconds ({} inferences / s)".format(N, dts, rate))
return dts, rate

def predict(self, X, debug=False, profile=False, encode=None, decode=None):
"""
Obtain the predictions of the NN implemented in the FPGA.
Parameters:
- X : the input vector. Should be numpy ndarray.
- dtype : the data type of the elements of the input/output vectors.
Note: it should be set depending on the interface of the accelerator; if it uses 'float'
types for the 'data' AXI-Stream field, 'np.float32' dtype is the correct one to use.
Instead if it uses 'ap_fixed<A,B>', 'np.intA' is the correct one to use (note that A cannot
any integer value, but it can assume {..., 8, 16, 32, ...} values. Check `numpy`
doc for more info).
In this case the encoding/decoding has to be computed by the PS. For example for
'ap_fixed<16,6>' type the following 2 functions are the correct one to use for encode/decode
'float' -> 'ap_fixed<16,6>':
```
def encode(xi):
return np.int16(round(xi * 2**10)) # note 2**10 = 2**(A-B)
def decode(yi):
return yi * 2**-10
encode_v = np.vectorize(encode) # to apply them element-wise
decode_v = np.vectorize(decode)
```
- profile : boolean. Set it to `True` to print the performance of the algorithm in term of `inference/s`.
- encode/decode: function pointers. See `dtype` section for more information.
- return: an output array based on `np.ndarray` with a shape equal to `y_shape` and a `dtype` equal to
the namesake parameter.
"""
if profile:
timea = datetime.now()
if encode is not None:
X = encode(X)
self.input_buffer[:] = X
self.myproject_axi_0.write(self.regin, self.input_buffer.physical_address)
self.myproject_axi_0.write(self.regout, self.output_buffer.physical_address)
self.myproject_axi_0.write(self.ctrl.AP_START, 0x1)
if debug:
print("Config OK")
while not self.ctrl.AP_DONE:
if debug:
print("Polling...")
if debug:
print("Done OK")
# result = self.output_buffer.copy()
if decode is not None:
self.output_buffer = decode(self.output_buffer)

if profile:
timeb = datetime.now()
dts, rate = self._print_dt(timea, timeb, len(X))
return self.output_buffer, dts, rate
else:
return self.output_buffer
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
set tcldir [file dirname [info script]]
source [file join $tcldir project.tcl]

# Project names
set design_name "design_1"
set hls_solution_name "solution1"
set ps_name "processing_system7_0"
set acc_name "${project_name}_axi_0"

# Board and chip part names
create_project ${project_name} ${project_name}_vivado_accelerator -part xc7z020clg400-1 -force
set_property board_part tul.com.tw:pynq-z2:part0:1.0 [current_project]

# Create block design
create_bd_design ${design_name}

# Setup IP repo
#set_property ip_repo_paths ${project_name}_prj [current_project]
set_property ip_repo_paths ${project_name}_prj/${hls_solution_name}/impl/ip [current_project]
update_ip_catalog

# Create and setup PS
create_bd_cell -type ip -vlnv xilinx.com:ip:processing_system7:5.5 ${ps_name}
apply_bd_automation -rule xilinx.com:bd_rule:processing_system7 -config " \
make_external {FIXED_IO, DDR} \
apply_board_preset {1} \
Master {Disable} \
Slave {Disable} " [get_bd_cells ${ps_name}]
set_property -dict [list \
CONFIG.PCW_USE_S_AXI_GP0 {1} \
CONFIG.PCW_USE_FABRIC_INTERRUPT {1} \
CONFIG.PCW_IRQ_F2P_INTR {1}\
] [get_bd_cells ${ps_name}]

# Create accelerator
create_bd_cell -type ip -vlnv xilinx.com:hls:myproject_axi:1.0 ${acc_name}

# Wiring
apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config " \
Clk_master {Auto} \
Clk_slave {Auto} \
Clk_xbar {Auto} \
Master /${ps_name}/M_AXI_GP0 \
Slave /${acc_name}/s_axi_CTRL_BUS \
intc_ip {New AXI Interconnect} \
master_apm {0}" [get_bd_intf_pins ${acc_name}/s_axi_CTRL_BUS]

apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config " \
Clk_master {Auto} \
Clk_slave {Auto} \
Clk_xbar {Auto} \
Master /${acc_name}/m_axi_IN_BUS \
Slave /${ps_name}/S_AXI_GP0 \
intc_ip {Auto} \
master_apm {0}" [get_bd_intf_pins ${ps_name}/S_AXI_GP0]

apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config " \
Clk_master /${ps_name}/FCLK_CLK0 (100 MHz) \
Clk_slave /${ps_name}/FCLK_CLK0 (100 MHz) \
Clk_xbar /${ps_name}/FCLK_CLK0 (100 MHz) \
Master /${acc_name}/m_axi_OUT_BUS \
Slave /${ps_name}/S_AXI_GP0 \
intc_ip {/axi_smc} \
master_apm {0}" [get_bd_intf_pins ${acc_name}/m_axi_OUT_BUS]

# Wiring interrupt signal
connect_bd_net [get_bd_pins ${acc_name}/interrupt] [get_bd_pins ${ps_name}/IRQ_F2P]

# Top level wrapper
make_wrapper -files [get_files ./${project_name}_vivado_accelerator/${project_name}.srcs/sources_1/bd/${design_name}/${design_name}.bd] -top
add_files -norecurse ./${project_name}_vivado_accelerator/${project_name}.srcs/sources_1/bd/${design_name}/hdl/${design_name}_wrapper.v

# Memory mapping
delete_bd_objs [get_bd_addr_segs ${project_name}/Data_m_axi_IN_BUS/SEG_${ps_name}_GP0_QSPI_LINEAR]
delete_bd_objs [get_bd_addr_segs -excluded ${acc_name}/Data_m_axi_IN_BUS/SEG_${ps_name}_GP0_IOP]
delete_bd_objs [get_bd_addr_segs -excluded ${acc_name}/Data_m_axi_IN_BUS/SEG_${ps_name}_GP0_M_AXI_GP0]
delete_bd_objs [get_bd_addr_segs ${acc_name}/Data_m_axi_OUT_BUS/SEG_${ps_name}_GP0_QSPI_LINEAR]
delete_bd_objs [get_bd_addr_segs -excluded ${acc_name}/Data_m_axi_OUT_BUS/SEG_${ps_name}_GP0_IOP]
delete_bd_objs [get_bd_addr_segs -excluded ${acc_name}/Data_m_axi_OUT_BUS/SEG_${ps_name}_GP0_M_AXI_GP0]

# Run synthesis and implementation
reset_run impl_1
reset_run synth_1
launch_runs impl_1 -to_step write_bitstream -jobs 6
wait_on_run -timeout 360 impl_1

# Reporting
open_run impl_1
report_utilization -file util.rpt -hierarchical -hierarchical_percentages

# Export HDF file for SDK flow
file mkdir ./hdf
file copy -force ${project_name}_vivado_accelerator/${project_name}.runs/impl_1/${design_name}_wrapper.sysdef ./hdf/${design_name}_wrapper.hdf
54 changes: 52 additions & 2 deletions hls4ml/writer/vivado_accelerator_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,17 @@ def write_axi_wrapper(self, model):

io_type = model.config.get_config_value("IOType")

model_brams = [var for var in model.get_weight_variables() if var.storage.lower() == 'bram']

for line in f.readlines():
if 'void myproject(' in line:
newline = 'void {}_axi(\n'.format(model.config.get_project_name())
elif '//hls-fpga-machine-learning insert include' in line:
newline = '#include "{}_axi.h"\n'.format(model.config.get_project_name())
for b in model_brams:
newline += '#include "weights/{}.h"\n'.format(b.name)
newline += '\n'
newline += '#include "nnet_utils/nnet_helpers.h"\n'
elif '//hls-fpga-machine-learning insert local vars' in line:
newline = ''
if self.vivado_accelerator_config.get_interface() == 'axi_stream':
Expand All @@ -102,8 +108,8 @@ def write_axi_wrapper(self, model):
newline += indent + '#pragma HLS STREAM variable=out_local depth={}\n'\
.format(model.get_output_variables()[0].pragma[1])
elif '//hls-fpga-machine-learning insert call' in line:
newline = indent + '{}(in_local, out_local);\n'.format(
model.config.get_project_name())
brams_str = (''.join([', ' + b.name for b in model_brams])) if len(model_brams) > 0 else "";
newline = indent + '{}(in_local, out_local{});\n'.format(model.config.get_project_name(), brams_str)
elif '//hls-fpga-machine-learning insert interface' in line:
if self.vivado_accelerator_config.get_interface() == 'axi_lite':
newline = ''
Expand All @@ -124,6 +130,16 @@ def write_axi_wrapper(self, model):
newline += indent + '#pragma HLS INTERFACE ap_ctrl_none port=return\n'
if model.config.get_config_value("IOType") == 'io_stream':
newline += indent + '#pragma HLS DATAFLOW\n'
elif '//hls-fpga-machine-learning insert load weights' in line:
newline = ''
for layer in model.get_layers():
for w in layer.get_weights():
if w.weight_class == 'CompressedWeightVariable':
newline += indent + ' nnet::load_compressed_weights_from_txt<{}, {}>({}, "{}.txt");\n'.format(w.type.name, w.nonzeros, w.name, w.name)
elif w.weight_class == 'ExponentWeightVariable':
newline += indent + ' nnet::load_exponent_weights_from_txt<{}, {}>({}, "{}.txt");\n'.format(w.type.name, w.data_length, w.name, w.name)
else:
newline += indent + ' nnet::load_weights_from_txt<{}, {}>({}, "{}.txt");\n'.format(w.type.name, w.data_length, w.name, w.name)
elif '//hls-fpga-machine-learning insert enqueue' in line:
io_type = model.config.get_config_value("IOType")
if io_type == 'io_parallel':
Expand All @@ -139,10 +155,12 @@ def write_axi_wrapper(self, model):
newline += indent + '}\n'
elif io_type == 'io_stream':
newline = ''
newline += 'LOAD_INPUT_OUTER_LOOP:\n'
newline += indent + 'for(unsigned i = 0; i < N_IN / {input_t}::size; ++i) {{\n'
# newline += indent + indent + '#pragma HLS PIPELINE\n'
newline += indent + indent + '{input_t} ctype;\n'
newline += indent + indent + '#pragma HLS DATA_PACK variable=ctype\n'
newline += 'LOAD_INPUT_INNER_LOOP:\n'
newline += indent + indent + 'for(unsigned j = 0; j < {input_t}::size; j++) {{\n'
# newline += indent + indent + indent + '#pragma HLS UNROLL\n'
if self.vivado_accelerator_config.get_interface() == 'axi_stream':
Expand All @@ -169,9 +187,11 @@ def write_axi_wrapper(self, model):
newline += indent + '}\n'
elif io_type == 'io_stream':
newline = ''
newline += 'STORE_OUTPUT_OUTER_LOOP:\n'
newline += indent + 'for(unsigned i = 0; i < N_OUT / {result_t}::size; ++i) {{\n'
# newline += indent + indent + '#pragma HLS PIPELINE\n'
newline += indent + indent + '{result_t} ctype = out_local.read();\n'
newline += 'STORE_OUTPUT_INNER_LOOP:\n'
newline += indent + indent + 'for(unsigned j = 0; j < {result_t}::size; j++) {{\n'
# newline += indent + indent + indent + '#pragma HLS UNROLL\n'
if self.vivado_accelerator_config.get_interface() == 'axi_stream':
Expand All @@ -188,6 +208,35 @@ def write_axi_wrapper(self, model):
f.close()
fout.close()

def modify_project_cpp(self, model):
'''
Modify the build_prj.tcl and build_lib.sh scripts to add the extra wrapper files and set the top function
'''
filedir = os.path.dirname(os.path.abspath(__file__))
oldfile = '{}/firmware/{}.cpp'.format(model.config.get_output_dir(), model.config.get_project_name())
newfile = '{}/build_prj_axi.tcl'.format(model.config.get_output_dir())
f = open(oldfile, 'r')
fout = open(newfile, 'w')

for line in f.readlines():
if '#pragma HLS INTERFACE axis port=' in line:
newline = ''
elif '#pragma HLS INTERFACE bram port=' in line:
newline = ''
elif 'nnet::load_weights_from_txt' in line:
newline = ''
elif 'nnet::load_exponent_weights_from_txt' in line:
newline = ''
elif 'nnet::load_compressed_weights_from_txt' in line:
newline = ''
else:
newline = line
fout.write(newline)

f.close()
fout.close()
os.rename(newfile, oldfile)

def modify_build_script(self, model):
'''
Modify the build_prj.tcl and build_lib.sh scripts to add the extra wrapper files and set the top function
Expand Down Expand Up @@ -369,6 +418,7 @@ def write_hls(self, model):
self.write_driver(model)
self.write_wrapper_test(model)
self.write_axi_wrapper(model)
self.modify_project_cpp(model)
self.modify_build_script(model)
self.write_new_tar(model)

4 changes: 2 additions & 2 deletions hls4ml/writer/vivado_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ def print_array_to_cpp(self, var, odir, write_txt_file=True):

if write_txt_file:
h_file.write("#ifndef __SYNTHESIS__\n")
h_file.write(var.definition_cpp() + ";\n")
h_file.write("static " + var.definition_cpp() + ";\n")
h_file.write("#else\n")

h_file.write(var.definition_cpp() + " = {")
h_file.write("static " + var.definition_cpp() + " = {")

#fill c++ array.
#not including internal brackets for multidimensional case
Expand Down