天天看点

vivado HLS从实例看优化

虽然看完了ug902关于vivado HLS优化的内容,但有点囫囵吞枣,所以想从实例去探索如何应用优化指令来达到最大的性能!

一、第一个实例

#ifndef _XIANGANWO3_H_
#define _XIANGANWO3_H_
#include "hls_video.h"

// maximum image size
#define MAX_WIDTH  1936
#define MAX_HEIGHT 1456

// typedef video library core structures
typedef hls::stream<ap_axiu<32,1,1,1> >               AXI_STREAM;
typedef hls::Scalar<3, unsigned char>                 RGB_PIXEL;
typedef hls::Scalar<1, unsigned char>                 GRAY_PIXEL;
typedef hls::Mat<MAX_HEIGHT, MAX_WIDTH, HLS_8UC3>     RGB_IMAGE;
typedef hls::Mat<MAX_HEIGHT, MAX_WIDTH, HLS_8UC1>     GRAY_IMAGE;

// top level function for HW synthesis
int hls_XiangAnWO3(AXI_STREAM& src_axi,AXI_STREAM& src_axi1,AXI_STREAM& src_axi2,AXI_STREAM& dst_axi, int rows, int cols,unsigned char  model[16777216]);

#endif
           
#include "XiangAnWO3.h"

void FluoDetect(RGB_IMAGE& srcImage,GRAY_IMAGE& FluoImage,unsigned char  model[16777216])
{
	RGB_IMAGE  img(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE img1(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE img2(MAX_HEIGHT, MAX_WIDTH);

#pragma HLS dataflow
	hls::Scale(srcImage,img,1.1);

	loop_height: for (int i = 0; i < MAX_HEIGHT; i++) {
	    loop_width: for (int j = 0; j < MAX_WIDTH; j++) {
#pragma HLS loop_flatten off
#pragma HLS pipeline II=1

	    	RGB_PIXEL src_data;
	    	GRAY_PIXEL dst_data(0);
	    	img>>src_data;

	    	unsigned char B = src_data.val[0];
	    	unsigned char G = src_data.val[1];
	    	unsigned char R = src_data.val[2];

	    	int rgbpixels = R + G * 256 + B * 256 * 256;
	    	unsigned char rgbelement = model[rgbpixels];

	    	dst_data.val[0]= (rgbelement > 0)?   (unsigned char)255: 0;

	    	img1 << dst_data;
	    }
	}

	hls::Dilate(img1,img2);
	hls::Erode(img2,FluoImage);
}

void FindTarget(RGB_IMAGE& srcImage,GRAY_IMAGE& dstImage)
{
	RGB_IMAGE   img(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE  img1(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE  img2(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE  img3(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE  img4(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE  img5(MAX_HEIGHT, MAX_WIDTH);

#pragma HLS dataflow
	hls::Scale(srcImage,img,1.5);
	hls::CvtColor<HLS_BGR2GRAY>(img,img1);
	hls::Threshold(img1,img2,38,255,HLS_THRESH_BINARY_INV);

	hls::Erode(img2,img3);
	hls::Erode(img3,img4);
	hls::Erode(img4,img5);
	hls::Sobel<1,0,3>(img5,dstImage);

}

void Composition(GRAY_IMAGE& srcImage1,GRAY_IMAGE& srcImage2,RGB_IMAGE& srcImage3,RGB_IMAGE& dstImage)
{
	loop_height: for (int i = 0; i < MAX_HEIGHT; i++) {
	loop_width: for (int j = 0; j < MAX_WIDTH; j++) {
#pragma HLS loop_flatten off
#pragma HLS pipeline II=1

				GRAY_PIXEL src_data1(0),src_data2(0);
				RGB_PIXEL dst_data(0,0,0);

				srcImage3 >>dst_data;

				srcImage1>>src_data1;
				srcImage2>>src_data2;
				unsigned char data1=src_data1.val[0];
				unsigned char data2=src_data2.val[0];

				if(data1==255)
				{
					dst_data.val[0]=0;
					dst_data.val[1]=0;
					dst_data.val[2]=255;
				}
				else if(data2==255)
				{
					dst_data.val[0]=255;
					dst_data.val[1]=255;
					dst_data.val[2]=255;
				}

				dstImage << dst_data;
		}
	}
}

int hls_XiangAnWO3(AXI_STREAM& src_axi, AXI_STREAM& src_axi1,AXI_STREAM& src_axi2,AXI_STREAM& dst_axi, int rows, int cols,unsigned char  model[16777216])
{
    //Create AXI streaming interfaces for the core
    #pragma HLS INTERFACE axis port=src_axi
	#pragma HLS INTERFACE axis port=dst_axi
	#pragma HLS INTERFACE axis port=src_axi1
	#pragma HLS INTERFACE axis port=src_axi2

	#pragma HLS RESOURCE core=AXI_SLAVE variable=rows   metadata="-bus_bundle CONTROL_BUS"
	#pragma HLS RESOURCE core=AXI_SLAVE variable=cols   metadata="-bus_bundle CONTROL_BUS"
	#pragma HLS RESOURCE core=AXI_SLAVE variable=return metadata="-bus_bundle CONTROL_BUS"

	#pragma HLS INTERFACE ap_stable port=rows
	#pragma HLS INTERFACE ap_stable port=cols

	RGB_IMAGE   img_0(rows, cols);
	RGB_IMAGE   img_1(rows, cols);
	GRAY_IMAGE  img_2(rows, cols);
	GRAY_IMAGE  img_3(rows, cols);
	RGB_IMAGE   img_4(rows, cols);
	RGB_IMAGE   img_5(rows, cols);

#pragma HLS dataflow
	hls::AXIvideo2Mat(src_axi, img_0);
	hls::AXIvideo2Mat(src_axi1, img_1);
	hls::AXIvideo2Mat(src_axi2, img_4);

	FluoDetect(img_0,img_2,model);
	FindTarget(img_1,img_3);

	Composition(img_2,img_3,img_4,img_5);

	hls::Mat2AXIvideo(img_5, dst_axi);

    return (int)0;
}
           

性能report是:

vivado HLS从实例看优化

大概要花28.5ms!!!!

1、第一次优化:arbitrary precise C++ type------reduce resource area

#ifndef _XIANGANWO3_H_
#define _XIANGANWO3_H_

#include "hls_video.h"
#include <ap_int.h>
#include <fstream>

typedef ap_uint<1> uint1;
typedef ap_uint<11> uint11;

// maximum image size
#define MAX_WIDTH  1936
#define MAX_HEIGHT 1456

// typedef video library core structures
typedef hls::stream<ap_axiu<32,1,1,1> >               AXI_STREAM;
typedef hls::Scalar<3, unsigned char>                 RGB_PIXEL;
typedef hls::Scalar<1, unsigned char>                 GRAY_PIXEL;
typedef hls::Mat<MAX_HEIGHT, MAX_WIDTH, HLS_8UC3>     RGB_IMAGE;
typedef hls::Mat<MAX_HEIGHT, MAX_WIDTH, HLS_8UC1>     GRAY_IMAGE;

// top level function for HW synthesis
uint1 hls_XiangAnWO3(AXI_STREAM& src_axi,AXI_STREAM& src_axi1,AXI_STREAM& src_axi2,AXI_STREAM& dst_axi, uint11 rows, uint11 cols,uint1 model[16777216]);

#endif
           
#include "XiangAnWO3.h"

void FluoDetect(RGB_IMAGE& srcImage,GRAY_IMAGE& FluoImage,uint11 rows,uint11 cols,uint1 model[16777216])
{
#pragma HLS UNROLL

#pragma HLS ARRAY_RESHAPE variable=model block factor=64

	RGB_IMAGE  img(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE img1(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE img2(MAX_HEIGHT, MAX_WIDTH);

#pragma HLS dataflow
	hls::Scale(srcImage,img,1.1);

	assert(rows<=MAX_HEIGHT);
	assert(cols<=MAX_WIDTH);
	loop_height: for (uint11 i = 0; i < rows; i++) {
	    loop_width: for (uint11 j = 0; j < cols; j++) {
#pragma HLS pipeline II=1
#pragma HLS DEPENDENCE variable=model inter false

	    	RGB_PIXEL src_data;
	    	GRAY_PIXEL dst_data(0);

	    	img>>src_data;

	    	unsigned char B = src_data.val[0];
	    	unsigned char G = src_data.val[1];
	    	unsigned char R = src_data.val[2];

	    	int rgbpixels = R + G * 256 + B * 256 * 256;
	    	uint1 rgbelement = model[rgbpixels];

	    	dst_data.val[0]= rgbelement*255;

	    	img1 << dst_data;
	    }
	}

	hls::Dilate(img1,img2);
	hls::Erode(img2,FluoImage);
}

void FindTarget(RGB_IMAGE& srcImage,GRAY_IMAGE& dstImage)
{
	RGB_IMAGE   img(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE  img1(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE  img2(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE  img3(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE  img4(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE  img5(MAX_HEIGHT, MAX_WIDTH);

#pragma HLS dataflow
	hls::Scale(srcImage,img,1.5);
	hls::CvtColor<HLS_BGR2GRAY>(img,img1);
	hls::Threshold(img1,img2,38,255,HLS_THRESH_BINARY_INV);

	hls::Erode(img2,img3);
	hls::Erode(img3,img4);
	hls::Erode(img4,img5);
	hls::Sobel<1,0,3>(img5,dstImage);
}

void Composition(GRAY_IMAGE& srcImage1,GRAY_IMAGE& srcImage2,RGB_IMAGE& srcImage3,RGB_IMAGE& dstImage,uint11 rows,uint11 cols)
{
#pragma HLS UNROLL

	assert(rows<=MAX_HEIGHT);
	assert(cols<=MAX_WIDTH);

	loop_height: for (uint11 i = 0; i < rows; i++) {
	loop_width: for (uint11 j = 0; j < cols; j++) {
#pragma HLS pipeline II=1

				GRAY_PIXEL src_data1(0),src_data2(0);
				RGB_PIXEL dst_data(0,0,0);

				srcImage3 >>dst_data;

				srcImage1>>src_data1;
				srcImage2>>src_data2;
				unsigned char data1=src_data1.val[0];
				unsigned char data2=src_data2.val[0];


				if(data1==255)
				{
					dst_data.val[0]=0;
					dst_data.val[1]=0;
					dst_data.val[2]=255;
				}
				else if(data2==255)
				{
					dst_data.val[0]=255;
					dst_data.val[1]=255;
					dst_data.val[2]=255;
				}

				dstImage << dst_data;
		}
	}

}

uint1 hls_XiangAnWO3(AXI_STREAM& src_axi, AXI_STREAM& src_axi1,AXI_STREAM& src_axi2,AXI_STREAM& dst_axi, uint11 rows, uint11 cols,uint1 model[16777216])
{
    //Create AXI streaming interfaces for the core
    #pragma HLS INTERFACE axis port=src_axi
	#pragma HLS INTERFACE axis port=dst_axi
	#pragma HLS INTERFACE axis port=src_axi1
	#pragma HLS INTERFACE axis port=src_axi2

	#pragma HLS RESOURCE core=AXI_SLAVE variable=rows   metadata="-bus_bundle CONTROL_BUS"
	#pragma HLS RESOURCE core=AXI_SLAVE variable=cols   metadata="-bus_bundle CONTROL_BUS"
	#pragma HLS RESOURCE core=AXI_SLAVE variable=return metadata="-bus_bundle CONTROL_BUS"

	#pragma HLS INTERFACE ap_stable port=rows
	#pragma HLS INTERFACE ap_stable port=cols

	RGB_IMAGE   img_0(rows, cols);
	RGB_IMAGE   img_1(rows, cols);
	GRAY_IMAGE  img_2(rows, cols);
	GRAY_IMAGE  img_3(rows, cols);
	RGB_IMAGE   img_4(rows, cols);
	RGB_IMAGE   img_5(rows, cols);

#pragma HLS dataflow
	hls::AXIvideo2Mat(src_axi, img_0);
	hls::AXIvideo2Mat(src_axi1, img_1);
	hls::AXIvideo2Mat(src_axi2, img_4);

	FluoDetect(img_0,img_2,rows,cols,model);
	FindTarget(img_1,img_3);

	Composition(img_2,img_3,img_4,img_5,rows,cols);

	hls::Mat2AXIvideo(img_5, dst_axi);

    return (uint1)0;
}
           

综合报告为:

vivado HLS从实例看优化

虽然没能降低时延,但FF和LUT资源占用少了很多!分别少了100!

2、第二次优化:全局变量、loop bound优化------减少时延

void FluoDetect(RGB_IMAGE& srcImage,GRAY_IMAGE& FluoImage,uint11 rows,uint11 cols,uint1 model[16777216])
{
#pragma HLS UNROLL
#pragma HLS ARRAY_RESHAPE variable=model block factor=64

	RGB_IMAGE  img(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE img1(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE img2(MAX_HEIGHT, MAX_WIDTH);

#pragma HLS dataflow
	hls::Scale(srcImage,img,1.1);

	assert(rows<=MAX_HEIGHT);
	assert(cols<=MAX_WIDTH);
	loop_height: for (uint11 i = 0; i < rows; i++) {
	    loop_width: for (uint11 j = 0; j < cols; j++) {
#pragma HLS pipeline II=1
#pragma HLS DEPENDENCE variable=model inter false

	    	RGB_PIXEL src_data;
	    	GRAY_PIXEL dst_data(0);

	    	img>>src_data;

	    	unsigned char B = src_data.val[0];
	    	unsigned char G = src_data.val[1];
	    	unsigned char R = src_data.val[2];

	    	int rgbpixels = R + G * 256 + B * 256 * 256;
	    	uint1 rgbelement = model[rgbpixels];

	    	dst_data.val[0]= rgbelement*255;

	    	img1 << dst_data;
	    }
	}

	hls::Dilate(img1,img2);
	hls::Erode(img2,FluoImage);

}

void Composition(GRAY_IMAGE& srcImage1,GRAY_IMAGE& srcImage2,RGB_IMAGE& srcImage3,RGB_IMAGE& dstImage,uint11 rows,uint11 cols)
{
#pragma HLS UNROLL

	assert(rows<=MAX_HEIGHT);
	assert(cols<=MAX_WIDTH);
	loop_height: for (uint11 i = 0; i < rows; i++) {
	loop_width: for (uint11 j = 0; j < cols; j++) {
#pragma HLS pipeline II=1

				GRAY_PIXEL src_data1(0),src_data2(0);
				RGB_PIXEL dst_data(0,0,0);

				srcImage3 >>dst_data;
				srcImage1>>src_data1;
				srcImage2>>src_data2;
				unsigned char data1=src_data1.val[0];
				unsigned char data2=src_data2.val[0];

				if(data1==255)
				{
					dst_data.val[0]=0;
					dst_data.val[1]=0;
					dst_data.val[2]=255;
				}
				else if(data2==255)
				{
					dst_data.val[0]=255;
					dst_data.val[1]=255;
					dst_data.val[2]=255;
				}

				dstImage << dst_data;
		}
	}
}
           

综合后,查看报告,并没有减少时延!!!依旧是28.5ms!!

第3次优化:DATAFLOW-----减少时延

void FluoDetect(RGB_IMAGE& srcImage,GRAY_IMAGE& FluoImage,uint11 rows,uint11 cols,uint1 model[16777216])
{
#pragma HLS UNROLL
#pragma HLS ARRAY_RESHAPE variable=model block factor=64

	RGB_IMAGE  img(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE img1(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE img2(MAX_HEIGHT, MAX_WIDTH);

	hls::Scale(srcImage,img,1.1);

	assert(rows<=MAX_HEIGHT);
	assert(cols<=MAX_WIDTH);
	loop_height: for (uint11 i = 0; i < rows; i++) {
	    loop_width: for (uint11 j = 0; j < cols; j++) {
#pragma HLS pipeline II=1
#pragma HLS DEPENDENCE variable=model inter false
#pragma HLS dataflow
	    	RGB_PIXEL src_data;
	    	GRAY_PIXEL dst_data(0);
	    	img>>src_data;

	    	unsigned char B = src_data.val[0];
	    	unsigned char G = src_data.val[1];
	    	unsigned char R = src_data.val[2];
	    	int rgbpixels = R + G * 256 + B * 256 * 256;
	    	uint1 rgbelement = model[rgbpixels];

	    	dst_data.val[0]= rgbelement*255;
	    	img1 << dst_data;
	    }
	}
	hls::Dilate(img1,img2);
	hls::Erode(img2,FluoImage);
}

void Composition(GRAY_IMAGE& srcImage1,GRAY_IMAGE& srcImage2,RGB_IMAGE& srcImage3,RGB_IMAGE& dstImage,uint11 rows,uint11 cols)
{
#pragma HLS UNROLL

	assert(rows<=MAX_HEIGHT);
	assert(cols<=MAX_WIDTH);

	loop_height: for (uint11 i = 0; i < rows; i++) {
	loop_width: for (uint11 j = 0; j < cols; j++) {
#pragma HLS pipeline II=1
#pragma HLS dataflow
				GRAY_PIXEL src_data1(0),src_data2(0);
				RGB_PIXEL dst_data(0,0,0);

				srcImage3 >>dst_data;
				srcImage1>>src_data1;
				srcImage2>>src_data2;
				unsigned char data1=src_data1.val[0];
				unsigned char data2=src_data2.val[0];

				if(data1==255)
				{
					dst_data.val[0]=0;
					dst_data.val[1]=0;
					dst_data.val[2]=255;
				}
				else if(data2==255)
				{
					dst_data.val[0]=255;
					dst_data.val[1]=255;
					dst_data.val[2]=255;
				}
				dstImage << dst_data;
		}
	}
}
           

其实我仿真时将一个地方不小心写错了参数,然后仿真报警告:

'hls::stream<unsigned char>.1' is read while empty, which may result in RTL simulation hanging.
           

这个是因为将hls::stream 变量或hls::Mat变量重复使用了!!总所周知,ug902中写过hls类型的变量只能使用一次作为输入参数!!!再次使用时它其实已经不再了,所以只能使用一次,否则就会报这个错!!!

修改后仿真时又报警告:

simulation :warning:Hls::stream 'hls::stream<unsigned char>.33' contains leftover data, which may result in RTL simulation hanging.
           

这个警告是什么原因,我还不知道!

我先没理警告继续综合会报错:

...dataflow...conditional execution on /opt/Xilinx/Vivado/2017.4/common/technology/autopilot/hls/hls_video_core.h:648:37 is not supported
wei
           

这是因为dataflow优化中,不允许有if() 条件语句!否则无法综合!!!!

4、第4次优化:DATAFLOW---if branch----multi-access---减少时延

将刚刚不允许条件语句中DATAFLOW优化的部分改成了这样:

void FluoDetect(RGB_IMAGE& srcImage,GRAY_IMAGE& FluoImage,uint11 rows,uint11 cols,uint1 model[16777216])
{
#pragma HLS UNROLL
#pragma HLS ARRAY_RESHAPE variable=model block factor=64

	RGB_IMAGE  img(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE img1(MAX_HEIGHT, MAX_WIDTH);
	GRAY_IMAGE img2(MAX_HEIGHT, MAX_WIDTH);

	hls::Scale(srcImage,img,1.1);
	assert(rows<=MAX_HEIGHT);
	assert(cols<=MAX_WIDTH);

	loop_height: for (uint11 i = 0; i < rows; i++) {
	    loop_width: for (uint11 j = 0; j < cols; j++) {
#pragma HLS PIPELINE II=1
#pragma HLS DEPENDENCE variable=model inter false
#pragma HLS DATAFLOW

	    	RGB_PIXEL src_data;
	    	GRAY_PIXEL dst_data(0);
	    	img>>src_data;

	    	unsigned char B = src_data.val[0];
	    	unsigned char G = src_data.val[1];
	    	unsigned char R = src_data.val[2];
	    	int rgbpixels = R + G * 256 + B * 256 * 256;
	    	uint1 rgbelement = model[rgbpixels];

	    	dst_data.val[0]= rgbelement*255;

	    	img1 << dst_data;
	    }
	}
	hls::Dilate(img1,img2);
	hls::Erode(img2,FluoImage);
}
           
void Composition(GRAY_IMAGE& srcImage1,GRAY_IMAGE& srcImage2,RGB_IMAGE& srcImage3,AXI_STREAM& dstImage,uint11 rows,uint11 cols)
{
#pragma HLS UNROLL

	AXI_STREAM8 src1;
	AXI_STREAM8 src2;
	RGB_IMAGE src3,src4;
	hls::Mat2AXIvideo(srcImage1, src1);
	hls::Mat2AXIvideo(srcImage2, src2);
	hls::Duplicate(srcImage3, src3,src4);

	assert(rows<=MAX_HEIGHT);
	assert(cols<=MAX_WIDTH);
	loop_height: for (uint11 i = 0; i < rows; i++) {
	loop_width: for (uint11 j = 0; j < cols; j++) {
#pragma HLS DATAFLOW
#pragma HLS PIPELINE II=1

				RGB_PIXEL dst_data(0,0,0);
				src3 >>dst_data;
				ap_uint<8> data1=src1.read().data;
				ap_uint<8> data2=src2.read().data;

				dst_data.val[0]=(1-data1/255)*data2+(!data1)*(!data2)*dst_data.val[0];
				dst_data.val[1]=(1-data1/255)*data2+(!data1)*(!data2)*dst_data.val[1];
				dst_data.val[2]=255*(data1 || data2)+(!data1)*(!data2)*dst_data.val[2];
				src4 << dst_data;

//				if(data1==255)
//				{
//					dst_data.val[0]=0;
//					dst_data.val[1]=0;
//					dst_data.val[2]=255;
//					src4 << dst_data;
//				}
//				else if(data2==255)
//				{
//					dst_data.val[0]=255;
//					dst_data.val[1]=255;
//					dst_data.val[2]=255;
//					src4 << dst_data;
//				}

		}
	}

	hls::Mat2AXIvideo(src4, dstImage);
}
           

注释掉的就是之前报错的条件语句。DATAFLOW优化对条件语句真是苛刻,只要出现if() 那么这个作用领域内这个优化就用不了。所以既然想用优化,那么就别用条件语句。另一个函数中的if我也改成了不用条件的形式。

同时,还改了对参数的多次访问,使用local cache!!!

综合后,报了新错:看错误信息,好像刚刚那个函数已经成功应用优化了,现在报错的是另一个函数:

INFO: [XFORM 203-721] Extract dataflow region from loop loop_width (Xiangan_wd/src/XiangAnWO3.cpp:82)  of function 'Composition'.
INFO: [XFORM 203-721] Extract dataflow region from loop loop_width (Xiangan_wd/src/XiangAnWO3.cpp:19)  of function 'FluoDetect'.
WARNING: [XFORM 203-713] Disabling dataflow in loop loop_width (Xiangan_wd/src/XiangAnWO3.cpp:19)  of function 'FluoDetect' .
WARNING: [XFORM 203-713] Disabling dataflow in loop loop_width (Xiangan_wd/src/XiangAnWO3.cpp:19)  of function 'FluoDetect' .
INFO: [XFORM 203-712] Store statement on variable  'tmp.3516' in a dataflow region ( 'dataflow_in_loop_loop_width' (/opt/Xilinx/Vivado/2017.4/common/technology/autopilot/hls/hls_video_core.h:83:37)) is synthesized to a separate process, please move it inside another function for better QoR.
INFO: [XFORM 203-712] Store statement on variable  'tmp.3516' in a dataflow region ( 'dataflow_in_loop_loop_width' (/opt/Xilinx/Vivado/2017.4/common/technology/autopilot/hls/hls_video_core.h:83:37)) is synthesized to a separate process, please move it inside another function for better QoR.
INFO: [XFORM 203-712] Store statement on variable  'tmp.3516' in a dataflow region ( 'dataflow_in_loop_loop_width' (/opt/Xilinx/Vivado/2017.4/common/technology/autopilot/hls/hls_video_core.h:83:37)) is synthesized to a separate process, please move it inside another function for better QoR.
INFO: [XFORM 203-712] Store statement on variable  'tmp.312' in a dataflow region ( 'dataflow_in_loop_loop_width403' (/opt/Xilinx/Vivado/2017.4/common/technology/autopilot/hls/hls_video_core.h:22:37)) is synthesized to a separate process, please move it inside another function for better QoR.
WARNING: [XFORM 203-713] Reading dataflow channel 'model.V' in the middle of dataflow may stall the dataflow pipeline:
WARNING: [XFORM 203-713] Argument 'model.V' has read operations in process function '__/opt/Xilinx/Vivado/2017.4/common/technology/autopilot/hls/hls_video_core.h_line648_proc' (/opt/Xilinx/Vivado/2017.4/common/technology/autopilot/hls/hls_video_core.h:33:37).
WARNING: [XFORM 203-713] Reading dataflow channel 'model.V' (Xiangan_wd/src/XiangAnWO3.cpp:117) in the middle of dataflow may stall the dataflow pipeline:
WARNING: [XFORM 203-713] Argument 'model.V' has read operations in process function 'FluoDetect' (Xiangan_wd/src/XiangAnWO3.cpp:4).
           
WARNING: [XFORM 203-713] Reading dataflow channel 'model.V' in the middle of dataflow may stall the dataflow pipeline:
Reading dataflow channel 'model.V' in the middle of dataflow may stall the dataflow pipeline:
INFO: [XFORM 203-712] Store statement on variable  'tmp.3516' in a dataflow region ( 'dataflow_in_loop_loop_width' (/opt/Xilinx/Vivado/2017.4/common/technology/autopilot/hls/hls_video_core.h:83:37)) is synthesized to a separate process, please move it inside another function for better QoR.
ERROR: [XFORM 203-801] Only one data field is allowed in AXI-Stream mode, however there are 3 data fields: srcImage3.data_stream[1].V srcImage3.data_stream[2].V dstImage.V.data.V
           

可以看到Extract dataflow region from ... of function 'Composition' 时没报错,但Extract dataflow region from...of function 'FluoDetect' 时报了一些错!我看了下那个函数,对参数model的multi-access 这个行为好像不好,因为ug902里说过不要对参数进行multi-access,如果要就用local cache来做!所以不管如何,我先修改这个问题。

5,第5次优化---

先看下 https://forums.xilinx.com/t5/Vivado/%E8%B7%9F-Xilinx-SAE-%E5%AD%A6-HLS-%E6%8C%81%E7%BB%AD%E6%9B%B4%E6%96%B0-%E4%B8%AD%E6%96%87%E8%AE%B2%E8%A7%A3/m-p/708179  这个是HLS 优化视频。

http://www.openhw.org/module/forum/forum.php?mod=viewthread&tid=595792&highlight=HLS%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0 

http://www.openhw.org/module/forum/forum.php?mod=viewthread&tid=595819&highlight=HLS%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0

http://www.openhw.org/module/forum/forum.php?mod=viewthread&tid=595929&highlight=HLS%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0

http://www.openhw.org/module/forum/forum.php?mod=viewthread&tid=658891&highlight=Vivado%2BHLS

http://www.openhw.org/module/forum/forum.php?mod=viewthread&tid=658879&highlight=Vivado%2BHLS

http://www.openhw.org/module/forum/forum.php?mod=viewthread&tid=659217&highlight=Vivado%2BHLS

这几个都是讲HLS优化的实例,非常非常有用!!!!!!!而且写得非常非常好!!!!!!!

我看完了,但还没实践,抽时间将这几个网址的实例实践感受下。