天天看點

DM8148 開發記錄 四 opencv 應用c6accel

ezsdk中有opencv的加速,直接測試之

http://software-dl.ti.com/dsps/dsps_public_sw/c6000/web/c6accel/latest/index_FDS.html 下載下傳位址

http://software-dl.ti.com/dsps/dsps_public_sw/ezsdk/index.html

For C6Accel 2.01.00.11 and later

C6Accel 2.01.00.11 intergrates OpenCV functionality on the DSP along with other libraries. This release adds a test application and a new build target 'opencv_app' to build this in the package. There is a pre-requisite to build this opencv_app. This requires users to build OpenCV 2.x for the ARM as described here and place the OpenCV shared libraries on the target filesystem under the path $TARGETFS/usr/lib

After the prequisite step is complete execute make to build and install the OpenCV test application

make opencv_app
make opencv_app_install
      

 從以上的文字可以看出,要交叉編譯opencv,因為,這個工具有做arm 和dsp運算速度的比較。是以要交叉編譯opencv。其實這個,貌似不是很有必要,對于有些環節,移植opencv 到a8,都有困難。

我本人也在dm3730上完全移植過opencv 1.0 到codec engine中,改天把四路和代碼一起貼出來。

c6accel 做的工作,很友善,不過如果要做優化,也要改動一部分的代碼,貌似,源碼都是打包好的

下面是dm8148的arm 和dsp 做opencv的耗時比較 的源碼

/*================================================================================*/

/*   Copyright (c) 2010, Texas Instruments Incorporated                           */

/*   All rights reserved.                                                         */

/*                                                                                */

/*   Name: C6Accel_testfxns.c                                                     */

/*                                                                                */

/*   Descriptions:                                                                */

/*   File contains code to test kernels in the C6Accel codec                      */

/*                                                                                */

/*      Version: 0.0.1                                                            */

/*================================================================================*/



/* This define uses the new frame based (ie row and col parameters) that are optimised for C6Accel

 as they only request one operation on all rows rather than row operations*/

#define USE_NEW_FRAME_APIS



/*XDC and codec engine includes*/

#include <xdc/std.h>

#include <ti/sdo/ce/osal/Memory.h>



/* Run Time lib include files: */

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <stdarg.h>

#include <math.h>

//#include "precomp.hpp"

#include <ti/sdo/linuxutils/cmem/src/interface/cmem.h>



/* Declare MACROS that will be used for benchmarking*/

#include "benchmark.h"



/* Include C6ACCEL headers*/

#include "../../c6accelw/c6accelw.h"

#include "../../c6accelw/c6accelw_opencv.h"



// extra headers for OpenCV

/*#include <time.h>

#include <sys/types.h>

#include <sys/time.h>

#include <sys/stat.h>*/

#include "opencv/highgui.h"



#define CVX_GRAY50 cvScalar(100,0,0,0)

#define CVX_WHITE  cvScalar(255,0,0,0)



/* Create default heap memory configuration for test functions */

static Memory_AllocParams testfxnsMemParams =

{

    Memory_CONTIGHEAP,

    Memory_CACHED,

    Memory_DEFAULTALIGNMENT,

    0

};



extern CMEM_AllocParams cvCmemParams;// = {CMEM_HEAP, CMEM_CACHED, 8};





/* Test for Floating point kernels */

/*

 * Test function for arithmetic rts single precision functions in this function

 */

// helper function - get overhead time

static int get_overhead_time(void)

{

        struct timeval startTime, endTime;

        

        gettimeofday(&startTime, NULL);

        gettimeofday(&endTime, NULL);

        

        return endTime.tv_usec - startTime.tv_usec;

}



Int c6accel_test_cvSobel(C6accel_Handle hC6accel, char *input_file_name, int n)

{

    IplImage *inputImg, *outputImg_arm, *outputImg_dsp, *scaleImg_arm, *scaleImg_dsp;

    struct timeval startTime, endTime;

    int t_overhead, t_algo, i;

    float t_avg;

    

    printf("cvSobel Test (%s, %i iterations)\n", input_file_name, n);

    

    // initialize timer

    t_overhead = get_overhead_time();

    printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0);

    

    // 1. Read input image from file

    inputImg = cvLoadImage( input_file_name, CV_LOAD_IMAGE_GRAYSCALE);

    

    // 2. Check image depth; require 8-bit

    if (inputImg->depth != IPL_DEPTH_8U && inputImg->depth != IPL_DEPTH_8S)

    {

        printf("C6accel_cvSobel test failed; input image must have 8-bit depth.\n");

        return 0;

    }

    

    // 3. Allocate output images (must have 16- and 8-bit depth; output MUST be 16S)

    outputImg_arm = cvCreateImage(cvSize(inputImg->width, inputImg->height), IPL_DEPTH_16S, 1);

    outputImg_dsp = cvCreateImage(cvSize(inputImg->width, inputImg->height), IPL_DEPTH_16S, 1);

    scaleImg_arm = cvCreateImage(cvSize(inputImg->width, inputImg->height), IPL_DEPTH_8U, 1);

    scaleImg_dsp = cvCreateImage(cvSize(inputImg->width, inputImg->height), IPL_DEPTH_8U, 1);





    //printf("outputImage: %x\n", CMEM_getPhys(outputImg_arm));

    // printf("outputImagedata: %x\n", CMEM_getPhys(outputImg_arm->imageData));

    // 4.a Apply ARM algorithm

    cvSobel(inputImg, outputImg_arm, 1, 1, 3); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvSobel(inputImg, outputImg_arm, 1, 1, 3);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called ARM Sobel function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 4.b Apply DSP algorithm

    C6accel_cvSobel(hC6accel, inputImg, outputImg_dsp, 1, 1, 3); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvSobel(hC6accel, inputImg, outputImg_dsp, 1, 1, 3);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called DSP Sobel function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 5.a Apply scale conversion on ARM

    cvConvertScale(outputImg_arm, scaleImg_arm, 0.5, 128); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvConvertScale(outputImg_arm, scaleImg_arm, 0.5, 128);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called ARM ConvertScale function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 5.b Apply scale conversion on DSP

    C6accel_cvConvertScale(hC6accel, outputImg_dsp, scaleImg_dsp, 0.5, 128); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvConvertScale(hC6accel, outputImg_dsp, scaleImg_dsp, 0.5, 128);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called DSP ConvertScale function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 6. Compare outputs

    printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(scaleImg_arm, scaleImg_dsp, CV_L2, NULL));

    

    // 7. Save outputs to filesystem

    cvSaveImage("./output_arm.png", scaleImg_arm, 0);

    cvSaveImage("./output_dsp.png", scaleImg_dsp, 0);

    

    //Free memory as cvFree was not patched during the openCv build

    // printf("outputImage: %x\n", CMEM_getPhys(outputImg_arm));

    // printf("outputImagedata: %x\n", CMEM_getPhys(outputImg_arm->imageData));



    // Freeing memory

      cvReleaseImage(&outputImg_arm);

      cvReleaseImage(&outputImg_dsp);

      cvReleaseImage(&scaleImg_arm);

      cvReleaseImage(&scaleImg_dsp);

      cvReleaseImage(&inputImg);

 

    printf("C6accel_cvSobel test completed successfully; outputs saved to filesystem\n");

    return 1;

}



Int c6accel_test_cvFlip(C6accel_Handle hC6accel, char *input_file_name, int n)

{

    IplImage *inputImg, *outputImg_arm, *outputImg_dsp, *copyImg_arm, *copyImg_dsp;

    struct timeval startTime, endTime;

    int t_overhead, t_algo, i;

    float t_avg;

    

    printf("cvFlip Test (%s, %i iterations)\n", input_file_name, n);

    

    // initialize timer

    t_overhead = get_overhead_time();

    printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0);

    

    // 1. Read input image from file

    inputImg = cvLoadImage( input_file_name, CV_LOAD_IMAGE_COLOR);

    

    // 2. Allocate output images (must have same depth, channels as input)

    outputImg_arm = cvCreateImage(cvSize(inputImg->width, inputImg->height), inputImg->depth, inputImg->nChannels);

    outputImg_dsp = cvCreateImage(cvSize(inputImg->width, inputImg->height), inputImg->depth, inputImg->nChannels);

    copyImg_arm = cvCreateImage(cvSize(inputImg->width, inputImg->height), inputImg->depth, inputImg->nChannels);

    copyImg_dsp = cvCreateImage(cvSize(inputImg->width, inputImg->height), inputImg->depth, inputImg->nChannels);

    

    // 3.a Apply ARM algorithm

    cvFlip(inputImg, outputImg_arm, -1); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvFlip(inputImg, outputImg_arm, -1);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called ARM Flip function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 3.b Apply DSP algorithm

    C6accel_cvFlip(hC6accel, inputImg, outputImg_dsp, -1); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvFlip(hC6accel, inputImg, outputImg_dsp, -1);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called DSP Flip function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 4.a Copy image on ARM

    cvCopy(outputImg_arm, copyImg_arm, NULL); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvCopy(outputImg_arm, copyImg_arm, NULL);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called ARM Copy function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 4.b Copy image on DSP

    C6accel_cvCopy(hC6accel, outputImg_dsp, copyImg_dsp, NULL); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvCopy(hC6accel, outputImg_dsp, copyImg_dsp, NULL);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called DSP Copy function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 5. Compare outputs

    printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(copyImg_arm, copyImg_dsp, CV_L2, NULL));

    

    // 6. Save outputs to filesystem

    cvSaveImage("./output_arm.png", copyImg_arm, 0);

    cvSaveImage("./output_dsp.png", copyImg_dsp, 0);



    //OpenCV way of Freeing memory

      cvReleaseImage(&outputImg_arm);

      cvReleaseImage(&outputImg_dsp);

      cvReleaseImage(©Img_arm);

      cvReleaseImage(©Img_dsp);

      cvReleaseImage(&inputImg);

    

    printf("C6accel_cvFlip test completed successfully; outputs saved to filesystem\n");

    return 1;

}



Int c6accel_test_cvCircle(C6accel_Handle hC6accel, char *input_file_name, int n)

{

    IplImage *armImg, *dspImg;

    struct timeval startTime, endTime;

    int t_overhead, t_algo, radius, i;

    CvScalar orange = { 0, 128, 255, 255 }, blue = {255, 0, 0, 255}; // BGRA

    CvPoint center, pt1, pt2;

    float t_avg;

    

    printf("cvCircle Test (%s, %i iterations)\n", input_file_name, n);

    

    // initialize timer

    t_overhead = get_overhead_time();

    printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0);

    

    // 1. Read input image from file

    armImg = cvLoadImage( input_file_name, CV_LOAD_IMAGE_COLOR);

    dspImg = cvLoadImage( input_file_name, CV_LOAD_IMAGE_COLOR);

    

    // 2. Compute circle and rectangle parameters (center, radius, pt1, pt2)

    center.x = armImg->width / 2;

    center.y = armImg->height / 2;

    radius = (center.x >= center.y) ? center.y : center.x;

    pt1.x = pt1.y = 0;

    pt2.x = center.x;

    pt2.y = armImg->height;

    

    // 3.a Zero out ARM image

    cvSetZero(armImg); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvSetZero(armImg);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called ARM SetZero function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 3.b Zero out DSP image

    C6accel_cvSetZero(hC6accel, dspImg); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvSetZero(hC6accel, dspImg);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called DSP SetZero function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 4.a Draw rectangle on ARM image

    cvRectangle(armImg, pt1, pt2, blue, 1, 8, 0); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvRectangle(armImg, pt1, pt2, blue, -1, 8, 0);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called ARM Rectangle function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 4.b Draw rectangle on DSP image

    C6accel_cvRectangle(hC6accel, dspImg, pt1, pt2, blue, 1, 8, 0); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvRectangle(hC6accel, dspImg, pt1, pt2, blue, -1, 8, 0);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called DSP Rectangle function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 5.a Draw circle on ARM image

    cvCircle(armImg, center, radius, orange, 1, 8, 0); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvCircle(armImg, center, radius, orange, -1, 8, 0);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called ARM Circle function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 5.b Draw circle on DSP image

    C6accel_cvCircle(hC6accel, dspImg, center, radius, orange, 1, 8, 0); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvCircle(hC6accel, dspImg, center, radius, orange, -1, 8, 0);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called DSP Circle function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 6. Compare outputs

    printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(armImg, dspImg, CV_L2, NULL));

    

    // 7. Save outputs to filesystem

    cvSaveImage("./output_arm.png", armImg, 0);

    cvSaveImage("./output_dsp.png", dspImg, 0);

    

    // Free memory

    cvReleaseImage(&armImg);

    cvReleaseImage(&dspImg);

    

    printf("C6accel_cvCircle test completed successfully; outputs saved to filesystem\n");

    return 1;

}



Int c6accel_test_cvResize(C6accel_Handle hC6accel, char *input_file_name, int n)

{

    IplImage *inputImg, *colorImg_arm, *colorImg_dsp, *resizeImg_arm, *resizeImg_dsp;

    struct timeval startTime, endTime;

    int t_overhead, t_algo, i;

    float t_avg;

    

    printf("cvResize Test (%s, %i iterations)\n", input_file_name, n);

    

    // initialize timer

    t_overhead = get_overhead_time();

    printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0);

    

    // 1. Read input image from file

    inputImg = cvLoadImage( input_file_name, CV_LOAD_IMAGE_COLOR);

    

    // 2. Allocate recolor images (must same depth, channels, size as input)

    //    and resize images (must have same depth, channels, as input with half size)

    colorImg_arm = cvCreateImage(cvSize(inputImg->width, inputImg->height), inputImg->depth, inputImg->nChannels);

    colorImg_dsp = cvCreateImage(cvSize(inputImg->width, inputImg->height), inputImg->depth, inputImg->nChannels);

    resizeImg_arm = cvCreateImage(cvSize(inputImg->width / 2, inputImg->height / 2), inputImg->depth, inputImg->nChannels);

    resizeImg_dsp = cvCreateImage(cvSize(inputImg->width / 2, inputImg->height / 2), inputImg->depth, inputImg->nChannels);

    

    // 3.a Apply ARM algorithm

    cvCvtColor(inputImg, colorImg_arm, CV_BGR2YCrCb); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvCvtColor(inputImg, colorImg_arm, CV_BGR2YCrCb);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called ARM CvtColor function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 3.b Apply DSP algorithm

    C6accel_cvCvtColor(hC6accel, inputImg, colorImg_dsp, CV_BGR2YCrCb); // run once before timing

    gettimeofday(&startTime, NULL);

    //for (i = 0; i < 1000; i++)

        C6accel_cvCvtColor(hC6accel, inputImg, colorImg_dsp, CV_BGR2YCrCb);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called DSP CvtColor function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 4.a Resize image on ARM

    cvResize(colorImg_arm, resizeImg_arm, CV_INTER_LINEAR); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvResize(colorImg_arm, resizeImg_arm, CV_INTER_LINEAR);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called ARM Resize function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 4.b Resize image on DSP

    C6accel_cvResize(hC6accel, colorImg_dsp, resizeImg_dsp, CV_INTER_LINEAR); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvResize(hC6accel, colorImg_dsp, resizeImg_dsp, CV_INTER_LINEAR);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called DSP Resize function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 5. Compare outputs

    printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(resizeImg_arm, resizeImg_dsp, CV_L2, NULL));

    

    // 6. Save outputs to filesystem

    cvSaveImage("./output_arm.png", resizeImg_arm, 0);

    cvSaveImage("./output_dsp.png", resizeImg_dsp, 0);



    // Free memory

    cvReleaseImage(&resizeImg_arm);

    cvReleaseImage(&resizeImg_dsp);

    cvReleaseImage(&colorImg_arm);

    cvReleaseImage(&colorImg_dsp);

    cvReleaseImage(&inputImg);

    printf("C6accel_cvResize test completed successfully; outputs saved to filesystem\n");

    return 1;

}



Int c6accel_test_cvEqualizeHist(C6accel_Handle hC6accel, char *input_file_name, int n)

{

    IplImage *inputImg, *outputImg_arm, *outputImg_dsp;

    struct timeval startTime, endTime;

    int t_overhead, t_algo, i;

    float t_avg;

    

    printf("cvEqualizeHist Test (%s, %i iterations)\n", input_file_name, n);

    

    // initialize timer

    t_overhead = get_overhead_time();

    printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0);

    

    // 1. Read input image from file

    inputImg = cvLoadImage( input_file_name, CV_LOAD_IMAGE_GRAYSCALE);

    

    // 2. Check image depth; require 8-bit

    if (inputImg->depth != IPL_DEPTH_8U && inputImg->depth != IPL_DEPTH_8S)

    {

        printf("C6accel_cvEqualizeHist test failed; input image must have 8-bit depth.\n");

        return 0;

    }

    

    // 3. Allocate output images

    outputImg_arm = cvCreateImage(cvSize(inputImg->width, inputImg->height), IPL_DEPTH_8U, 1);

    outputImg_dsp = cvCreateImage(cvSize(inputImg->width, inputImg->height), IPL_DEPTH_8U, 1);



    // 4.a Apply ARM algorithm

    cvEqualizeHist(inputImg, outputImg_arm); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvEqualizeHist(inputImg, outputImg_arm);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called ARM EqualizeHist function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 4.b Apply DSP algorithm

    C6accel_cvEqualizeHist(hC6accel, inputImg, outputImg_dsp); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvEqualizeHist(hC6accel, inputImg, outputImg_dsp);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called DSP EqualizeHist function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 6. Compare outputs

    printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL));

    

    // 7. Save outputs to filesystem

    cvSaveImage("./output_arm.png", outputImg_arm, 0);

    cvSaveImage("./output_dsp.png", outputImg_dsp, 0);



   // Free memory

    cvReleaseImage(&outputImg_arm);

    cvReleaseImage(&outputImg_dsp);

    cvReleaseImage(&inputImg);    

    printf("C6accel_cvEqualizeHist test completed successfully; outputs saved to filesystem\n");

    return 1;

}



   // TEMP: function to traverse classifier cascade

    typedef int sumtype;

    typedef double sqsumtype;



    typedef struct CvHidHaarFeature

    {

        struct

        {

            sumtype *p0, *p1, *p2, *p3;

            float weight;

        }

        rect[CV_HAAR_FEATURE_MAX];

    }

    CvHidHaarFeature;





    typedef struct CvHidHaarTreeNode

    {

        CvHidHaarFeature feature;

        float threshold;

        int left;

        int right;

    }

    CvHidHaarTreeNode;





    typedef struct CvHidHaarClassifier

    {

        int count;

        //CvHaarFeature* orig_feature;

        CvHidHaarTreeNode* node;

        float* alpha;

    }

    CvHidHaarClassifier;





    typedef struct CvHidHaarStageClassifier

    {

        int  count;

        float threshold;

        CvHidHaarClassifier* classifier;

        int two_rects;

        

        struct CvHidHaarStageClassifier* next;

        struct CvHidHaarStageClassifier* child;

        struct CvHidHaarStageClassifier* parent;

    }

    CvHidHaarStageClassifier;





    struct CvHidHaarClassifierCascade

    {

        int  count;

        int  is_stump_based;

        int  has_tilted_features;

        int  is_tree;

        double inv_window_area;

        CvMat sum, sqsum, tilted;

        CvHidHaarStageClassifier* stage_classifier;

        sqsumtype *pq0, *pq1, *pq2, *pq3;

        sumtype *p0, *p1, *p2, *p3;



        void** ipp_stages;

    };



    

     void traverse_and_translate_hid_cascade(CvHidHaarClassifierCascade *cascade, FILE *fp)

    {

        CvHidHaarStageClassifier *hid_stage;

        CvHidHaarClassifier *hid_classifier;

        CvHidHaarTreeNode *hid_node;

        int stage_count, classifier_count, feature_count;

        int i, j, k, l;

    

        if (cascade == NULL)

        {

            fprintf(fp, "//\tHidden cascade pointer is NULL; no traversal required\n");

            return;

        }

        else

        {

            stage_count = cascade->count;

            fprintf(fp, "//\tHidden cascade at 0x%08X has %i stages\n",

                   (unsigned int)cascade,

                   stage_count);

            for (i = 0; i < cascade->count; i++)

            {

                hid_stage = cascade->stage_classifier + i;

                classifier_count = hid_stage->count;

                fprintf(fp, "//\t\tHidden stage %i at 0x%08X has %i classifiers\n",

                        i, (unsigned int)hid_stage, classifier_count);

                for (j = 0; j < classifier_count; j++)

                {

                    hid_classifier = hid_stage->classifier + j;

                    feature_count = hid_classifier->count;

                    fprintf(fp, "//\t\t\tHidden classifier %i at 0x%08X has %i nodes/features\n",

                            j, (unsigned int)hid_classifier, feature_count);

                    for (k = 0; k < feature_count; k++)

                    {

                        hid_node = hid_classifier->node + k;

                        

                        // apply CMEM_getPhys to node contents (none)

                        

                        // apply CMEM_getPhys to feature contents (rect[0].p0, .p1, .p2, .p3, rect[1].p0, etc.)

                        for (l = 0; l < CV_HAAR_FEATURE_MAX; l++)

                        {

                            hid_node->feature.rect[l].p0 = hid_node->feature.rect[l].p0 ?

                                (void *)CMEM_getPhys(hid_node->feature.rect[l].p0) : NULL;

                            hid_node->feature.rect[l].p1 = hid_node->feature.rect[l].p1 ?

                                (void *)CMEM_getPhys(hid_node->feature.rect[l].p1) : NULL;

                            hid_node->feature.rect[l].p2 = hid_node->feature.rect[l].p2 ?

                                (void *)CMEM_getPhys(hid_node->feature.rect[l].p2) : NULL;

                            hid_node->feature.rect[l].p3 = hid_node->feature.rect[l].p3 ?

                                (void *)CMEM_getPhys(hid_node->feature.rect[l].p3) : NULL;

                            fprintf(fp, "//\t\t\t\tHidden node %i feature rect %i pointers translated to physical addresses (0x%08X, 0x%08X, 0x%08X, 0x%08X)\n",

                            k, l, (unsigned int)hid_node->feature.rect[l].p0, (unsigned int)hid_node->feature.rect[l].p1,

                            hid_node->feature.rect[l].p2, hid_node->feature.rect[l].p3);

                        }

                    }

                    

                    // apply CMEM_getPhys to classifier contents (node, alpha)

                    hid_classifier->node  = hid_classifier->node  ? (void *)CMEM_getPhys(hid_classifier->node)  : NULL;

                    hid_classifier->alpha = hid_classifier->alpha ? (void *)CMEM_getPhys(hid_classifier->alpha) : NULL;

                    fprintf(fp, "//\t\t\tHidden classifier %i pointers translated to physical addresses (0x%08X, 0x%08X)\n",

                            j, (unsigned int)hid_classifier->node, (unsigned int)hid_classifier->alpha);

                }

                

                // apply CMEM_getPhys to stage contents (classifier, next, child, parent)

                hid_stage->classifier = hid_stage->classifier ? (void *)CMEM_getPhys(hid_stage->classifier) : NULL;

                hid_stage->next = hid_stage->next ? (void *)CMEM_getPhys(hid_stage->next) : NULL;

                hid_stage->child = hid_stage->child ? (void *)CMEM_getPhys(hid_stage->child) : NULL;

                hid_stage->parent = hid_stage->parent ? (void *)CMEM_getPhys(hid_stage->parent) : NULL;

                fprintf(fp, "//\t\tHidden stage %i pointers translated to physical addresses (0x%08X, 0x%08X, 0x%08X, 0x%08X)\n",

                        i, (unsigned int)hid_stage->classifier, (unsigned int)hid_stage->next,

                        (unsigned int)hid_stage->child, (unsigned int)hid_stage->parent);

            }

            

            // apply CMEM_getPhys to hid_cascade contents (stage_classifier, sum.refcount, sum.data.i,

            //     sqsum.refcount, sqsum.data.db, tilted.refcount, tilted.data.i, pq0, pq1, pq2, pq3,

            //     p0, p1, p2, p3, ipp_stages)

            cascade->stage_classifier = cascade->stage_classifier ?

                (void *)CMEM_getPhys(cascade->stage_classifier) : NULL;

            cascade->sum.refcount = cascade->sum.refcount ?

                (void *)CMEM_getPhys(cascade->sum.refcount) : NULL;

            cascade->sum.data.i = cascade->sum.data.i ?

                (void *)CMEM_getPhys(cascade->sum.data.i) : NULL;

            cascade->sqsum.refcount = cascade->sqsum.refcount ?

                (void *)CMEM_getPhys(cascade->sqsum.refcount) : NULL;

            cascade->sqsum.data.db = cascade->sqsum.data.db ?

                (void *)CMEM_getPhys(cascade->sqsum.data.db) : NULL;

            cascade->tilted.refcount = cascade->tilted.refcount ?

                (void *)CMEM_getPhys(cascade->tilted.refcount) : NULL;

            cascade->tilted.data.i = cascade->tilted.data.i ?

                (void *)CMEM_getPhys(cascade->tilted.data.i) : NULL;

            fprintf(fp, "//\tHidden cascade pointers translated to physical addresses (1 of 4) (0x%08X, 0x%08X, 0x%08X)\n",

                    (unsigned int)cascade->stage_classifier, (unsigned int)cascade->sum.refcount,

                    (unsigned int)cascade->sum.data.i);

            fprintf(fp, "//\tHidden cascade pointers translated to physical addresses (2 of 4) (0x%08X, 0x%08X, 0x%08X, 0x%08X)\n",

                    (unsigned int)cascade->sqsum.refcount, (unsigned int)cascade->sqsum.data.db,

                    (unsigned int)cascade->tilted.refcount, (unsigned int)cascade->tilted.data.i);

            cascade->pq0 = cascade->pq0 ? (void *)CMEM_getPhys(cascade->pq0) : NULL;

            cascade->pq1 = cascade->pq1 ? (void *)CMEM_getPhys(cascade->pq1) : NULL;

            cascade->pq2 = cascade->pq2 ? (void *)CMEM_getPhys(cascade->pq2) : NULL;

            cascade->pq3 = cascade->pq3 ? (void *)CMEM_getPhys(cascade->pq3) : NULL;

            cascade->p0 = cascade->p0 ? (void *)CMEM_getPhys(cascade->p0) : NULL;

            cascade->p1 = cascade->p1 ? (void *)CMEM_getPhys(cascade->p1) : NULL;

            cascade->p2 = cascade->p2 ? (void *)CMEM_getPhys(cascade->p2) : NULL;

            cascade->p3 = cascade->p3 ? (void *)CMEM_getPhys(cascade->p3) : NULL;

            fprintf(fp, "//\tHidden cascade pointers translated to physical addresses (3 of 4) (0x%08X, 0x%08X, 0x%08X, 0x%08X)\n",

                    (unsigned int)cascade->pq0, (unsigned int)cascade->pq1,

                    (unsigned int)cascade->pq2, (unsigned int)cascade->pq3);

            fprintf(fp, "//\tHidden cascade pointers translated to physical addresses (4 of 4) (0x%08X, 0x%08X, 0x%08X, 0x%08X)\n",

                    (unsigned int)cascade->p0, (unsigned int)cascade->p1,

                    (unsigned int)cascade->p2, (unsigned int)cascade->p3);

            if (cascade->ipp_stages == NULL)

            {

                fprintf(fp, "//\tHidden cascade ipp stage array pointer is NULL; sub-array not traversed\n");

            }

            else

            {

                for (i = 0; i < cascade->count; i++)

                {

                    cascade->ipp_stages[i] = cascade->ipp_stages[i] ?

                        (void *)CMEM_getPhys(cascade->ipp_stages[i]) : NULL;

                    fprintf(fp, "//\tHidden cascade ipp stage %i translated to physical address (0x%08X)\n",

                        i, (unsigned int)cascade->ipp_stages[i]);

                }

            }

            

            cascade->ipp_stages = cascade->ipp_stages ?

                (void *)CMEM_getPhys(cascade->ipp_stages) : NULL;

            fprintf(fp, "//\tHidden cascade ipp stage array pointer translated to physical address (0x%08X)\n",

                (unsigned int)cascade->ipp_stages);

        }

    }



 void LOCAL_restore_hid_cascade(CvHidHaarClassifierCascade *cascade)

    {

        CvHidHaarStageClassifier *hid_stage;

        CvHidHaarClassifier *hid_classifier;

        CvHidHaarTreeNode *hid_node;

        int stage_count, classifier_count, feature_count;

        int i, j, k, l;

    

        if (cascade == NULL)

        {

           return;

        }

        else

        {

            stage_count = cascade->count;

            // apply Memory_getBufferVirtualAddress to hid_cascade contents (stage_classifier, sum.refcount, sum.data.i,

            //     sqsum.refcount, sqsum.data.db, tilted.refcount, tilted.data.i, pq0, pq1, pq2, pq3,

            //     p0, p1, p2, p3, ipp_stages)

            cascade->stage_classifier = cascade->stage_classifier ?

                (void *)Memory_getBufferVirtualAddress((int)cascade->stage_classifier,sizeof(CvHidHaarStageClassifier)) : NULL;

            cascade->sum.refcount = cascade->sum.refcount ?

                (void *)Memory_getBufferVirtualAddress((int)cascade->sum.refcount,sizeof(int)) : NULL;

            cascade->sum.data.i = cascade->sum.data.i ?

                (void *)Memory_getBufferVirtualAddress((int)cascade->sum.data.i,sizeof(int)) : NULL;

            cascade->sqsum.refcount = cascade->sqsum.refcount ?

                (void *)Memory_getBufferVirtualAddress((int)cascade->sqsum.refcount,sizeof(int)) : NULL;

            cascade->sqsum.data.db = cascade->sqsum.data.db ?

                (void *)Memory_getBufferVirtualAddress((int)cascade->sqsum.data.db,sizeof(double *)) : NULL;

            cascade->tilted.refcount = cascade->tilted.refcount ?

                (void *)Memory_getBufferVirtualAddress((int)cascade->tilted.refcount,sizeof(int)) : NULL;

            cascade->tilted.data.i = cascade->tilted.data.i ?

                (void *)Memory_getBufferVirtualAddress((int)cascade->tilted.data.i,sizeof(int)) : NULL;

            cascade->pq0 = cascade->pq0 ? (void *)Memory_getBufferVirtualAddress((int)cascade->pq0,sizeof(sqsumtype)) : NULL;

            cascade->pq1 = cascade->pq1 ? (void *)Memory_getBufferVirtualAddress((int)cascade->pq1,sizeof(sqsumtype)) : NULL;

            cascade->pq2 = cascade->pq2 ? (void *)Memory_getBufferVirtualAddress((int)cascade->pq2,sizeof(sqsumtype)) : NULL;

            cascade->pq3 = cascade->pq3 ? (void *)Memory_getBufferVirtualAddress((int)cascade->pq3,sizeof(sqsumtype)) : NULL;

            cascade->p0 = cascade->p0 ? (void *)Memory_getBufferVirtualAddress((int)cascade->p0,sizeof(sumtype)) : NULL;

            cascade->p1 = cascade->p1 ? (void *)Memory_getBufferVirtualAddress((int)cascade->p1,sizeof(sumtype)) : NULL;

            cascade->p2 = cascade->p2 ? (void *)Memory_getBufferVirtualAddress((int)cascade->p2,sizeof(sumtype)) : NULL;

            cascade->p3 = cascade->p3 ? (void *)Memory_getBufferVirtualAddress((int)cascade->p3,sizeof(sumtype)) : NULL;

            

            for (i = 0; i < cascade->count; i++)

            {

                hid_stage = cascade->stage_classifier + i;

                classifier_count = hid_stage->count;

                // apply Memory_getBufferVirtualAddress to stage contents (classifier, next, child, parent)

                hid_stage->classifier = hid_stage->classifier ? (void *)Memory_getBufferVirtualAddress((int)hid_stage->classifier,sizeof(CvHidHaarClassifier)) : NULL;

                hid_stage->next = hid_stage->next ? (void *)Memory_getBufferVirtualAddress((int)hid_stage->next,sizeof(CvHidHaarStageClassifier)) : NULL;

                hid_stage->child = hid_stage->child ? (void *)Memory_getBufferVirtualAddress((int)hid_stage->child,sizeof(CvHidHaarStageClassifier)) : NULL;

                hid_stage->parent = hid_stage->parent ? (void *)Memory_getBufferVirtualAddress((int)hid_stage->parent,sizeof(CvHidHaarStageClassifier)) : NULL;

                

                for (j = 0; j < classifier_count; j++)

                {

                    hid_classifier = hid_stage->classifier + j;

                    feature_count = hid_classifier->count;

                    // apply Memory_getBufferVirtualAddress to classifier contents (node, alpha)

                    hid_classifier->node  = hid_classifier->node  ? (void *)Memory_getBufferVirtualAddress((int)hid_classifier->node,sizeof(CvHidHaarTreeNode))  : NULL;

                    hid_classifier->alpha = hid_classifier->alpha ? (void *)Memory_getBufferVirtualAddress((int)hid_classifier->alpha,sizeof(float)) : NULL;

                    

                    for (k = 0; k < feature_count; k++)

                    {

                        hid_node = hid_classifier->node + k;

                        

                        // apply Memory_getBufferVirtualAddress to node contents (none)

                        

                        // apply Memory_getBufferVirtualAddress to feature contents (rect[0].p0, .p1, .p2, .p3, rect[1].p0, etc.)

                        for (l = 0; l < CV_HAAR_FEATURE_MAX; l++)

                        {

                            hid_node->feature.rect[l].p0 = hid_node->feature.rect[l].p0 ?

                                (void *)Memory_getBufferVirtualAddress((int)hid_node->feature.rect[l].p0,sizeof(sumtype)) : NULL;

                            hid_node->feature.rect[l].p1 = hid_node->feature.rect[l].p1 ?

                                (void *)Memory_getBufferVirtualAddress((int)hid_node->feature.rect[l].p1,sizeof(sumtype)) : NULL;

                            hid_node->feature.rect[l].p2 = hid_node->feature.rect[l].p2 ?

                                (void *)Memory_getBufferVirtualAddress((int)hid_node->feature.rect[l].p2,sizeof(sumtype)) : NULL;

                            hid_node->feature.rect[l].p3 = hid_node->feature.rect[l].p3 ?

                                (void *)Memory_getBufferVirtualAddress((int)hid_node->feature.rect[l].p3,sizeof(sumtype)) : NULL;

                           

                        }

                    }

                    

                    

                }

                

                

            }

                      

            if (cascade->ipp_stages == NULL)

            {

                printf( "//\tHidden cascade ipp stage array pointer is NULL; sub-array not traversed\n");

            }

            else

            {

                for (i = 0; i < cascade->count; i++)

                {

                    cascade->ipp_stages[i] = cascade->ipp_stages[i] ?

                       (void *)Memory_getBufferVirtualAddress((int)cascade->ipp_stages[i],sizeof(void *)) : NULL;

                }

            }

            

            cascade->ipp_stages = cascade->ipp_stages ?

                (void *)Memory_getBufferVirtualAddress((int)cascade->ipp_stages,sizeof(void *)) : NULL;

           }

    }



    

       void traverse_and_translate_cascade(CvHaarClassifierCascade *cascade )

    {

        CvHaarStageClassifier *stage;

        CvHaarClassifier *classifier;

        CvHaarFeature *feature;

        int stage_count, classifier_count, feature_count;

        unsigned int new_thresh, new_left, new_right, new_alpha;

        int i, j, k;

        

        FILE *fp = fopen("dsp_cascade_traversal_log.txt", "w+");

        

        stage_count = cascade->count;

        fprintf(fp, "Cascade at 0x%08X has %i stages\n",

               (unsigned int)cascade,

               stage_count);

        for (i = 0; i < stage_count; i++)

        {

            stage = cascade->stage_classifier + i;

            classifier_count = stage->count;

            fprintf(fp, "\tStage %i at 0x%08X has %i classifiers\n", i, (unsigned int)stage, classifier_count);

            for (j = 0; j < classifier_count; j++)

            {

                classifier = stage->classifier + j;

                feature_count = classifier->count;

                fprintf(fp, "\t\tClassifier %i at 0x%08X has %i features\n", j, (unsigned int)classifier, feature_count);

                for (k = 0; k < feature_count; k++)

                {

                    feature = classifier->haar_feature + k;

                    fprintf(fp, "\t\t\tFeature %i at 0x%08X rect array that begins at 0x%08X\n", k,

                            (unsigned int)feature, (unsigned int)feature->rect);

                    

                    // apply CMEM_getPhys to feature contents (rect)

                    // NOT NECESSARY (array pointer doesn't actually exist; feature->rect == (char *)feature + 4

                    //feature->rect = (unsigned int)CMEM_getPhys(feature->rect);

                    //fprintf(fp, "0x%08X\n", (unsigned int)feature->rect);

                }

                

                // apply CMEM_getPhys to classifier contents (haar_feature, threshold, left, right, alpha)

                

                classifier->haar_feature = classifier->haar_feature ? (void *)Memory_getBufferPhysicalAddress(classifier->haar_feature, sizeof(CvHaarFeature),NULL) : NULL;

                classifier->threshold = classifier->threshold ? (void *)Memory_getBufferPhysicalAddress(classifier->threshold,sizeof(float),NULL) : NULL;

                classifier->left = classifier->left ? (void *)Memory_getBufferPhysicalAddress(classifier->left,sizeof(int),NULL) : NULL;

                classifier->right = classifier->right ? (void *)Memory_getBufferPhysicalAddress(classifier->right,sizeof(int),NULL) : NULL;

                classifier->alpha = classifier->alpha ? (void *)Memory_getBufferPhysicalAddress(classifier->alpha,sizeof(float),NULL) : NULL;

                fprintf(fp, "\t\tClassifier %i pointers translated to physical addresses (0x%08X, 0x%08X, 0x%08X, 0x%08X, 0x%08X)\n",

                        j, (unsigned int)classifier->haar_feature, (unsigned int)classifier->threshold,

                        (unsigned int)classifier->left, (unsigned int)classifier->right,

                        (unsigned int)classifier->alpha);

                Memory_cacheWbInv( (void *)classifier, sizeof(CvHaarClassifier));

           //     Cache_wait();

              }

            

            // apply CMEM_getPhys to stage contents (classifier)

            fprintf(fp, "\tBefore translation :Stage %i pointers translated to physical addresses (0x%08X)\n", i, (unsigned int)stage->classifier);

            stage->classifier = stage->classifier ? (void *)Memory_getBufferPhysicalAddress(stage->classifier,sizeof(CvHaarClassifier),NULL) : NULL;

            Memory_cacheWbInv( (void *)stage,sizeof(CvHaarStageClassifier));



            fprintf(fp, "\tStage %i pointers translated to physical addresses (0x%08X)\n", i, (unsigned int)stage->classifier);

        }

        

        // traverse "hidden" cascade, too

        traverse_and_translate_hid_cascade(cascade->hid_cascade, fp);

                

        // apply CMEM_getPhys to cascade contents (stage_classifier, hid_cascade)

       // cascade->stage_classifier->classifier = cascade->stage_classifier->classifier ? (void *)CMEM_getPhys(cascade->stage_classifier->classifier) : NULL;

        cascade->stage_classifier = cascade->stage_classifier ? (void *)Memory_getBufferPhysicalAddress(cascade->stage_classifier,sizeof(CvHaarStageClassifier),NULL) : NULL;

        cascade->hid_cascade = cascade->hid_cascade ? (void *)Memory_getBufferPhysicalAddress(cascade->hid_cascade,sizeof(CvHidHaarClassifierCascade),NULL) : NULL;

         

	 fprintf(fp, "Cascade pointers translated to physical addresses (0x%08X, 0x%08X)\n", (unsigned int)cascade->stage_classifier,

                (unsigned int)cascade->hid_cascade);



        Memory_cacheWbInvAll();

	 //Cache_wait();

        fclose(fp);

    }





void LOCAL_restore_cascade(CvHaarClassifierCascade *cascade)

    {

        CvHaarStageClassifier *stage;

        CvHaarClassifier *classifier;

        CvHaarFeature *feature;

        int stage_count, classifier_count, feature_count;

        unsigned int new_thresh, new_left, new_right, new_alpha;

        int i, j, k;

       

        stage_count = cascade->count;

       // printf("Stage_classifier %x\n",(int)cascade->stage_classifier);

        cascade->stage_classifier = (void *)Memory_getBufferVirtualAddress(((int)cascade->stage_classifier),sizeof(CvHaarStageClassifier)+sizeof(int));

       // printf("Stage_classifier %x\n",(int)cascade->stage_classifier);

        cascade->hid_cascade =  cascade->hid_cascade ? (void *)Memory_getBufferVirtualAddress((int)cascade->hid_cascade,sizeof(CvHidHaarClassifierCascade)): NULL;

              

	for (i = 0; i < stage_count; i++)

        {

       //     printf("In stage loop\n");

            stage = cascade->stage_classifier + i;

        //    printf("stage %x\n",(int)stage );

            classifier_count = stage->count;

             // apply CMEM_getPhys to stage contents (classifier)

            stage->classifier = (void *)Memory_getBufferVirtualAddress((int)stage->classifier,sizeof(CvHaarClassifier));

            

            for (j = 0; j < classifier_count; j++)

            {

          //     printf("In classifier loop\n");

                classifier = stage->classifier + j;

          //     printf(" classifier %x\n", classifier);

                feature_count = classifier->count;

                

                // apply CMEM_getPhys to classifier contents (haar_feature, threshold, left, right, alpha)

                classifier->haar_feature = (void *)Memory_getBufferVirtualAddress((int)(classifier->haar_feature),sizeof(CvHaarFeature));

                classifier->threshold =  (void *)Memory_getBufferVirtualAddress((int)classifier->threshold,sizeof(float)) ;

                classifier->left = (void *)Memory_getBufferVirtualAddress((int)classifier->left,sizeof(int));

                classifier->right =  (void *)Memory_getBufferVirtualAddress((int)classifier->right,sizeof(int));

                classifier->alpha =  (void *)Memory_getBufferVirtualAddress((int)classifier->alpha,sizeof(float));

               // Memory_cacheWbInv( (void *)classifier, sizeof(CvHaarClassifier));

                for (k = 0; k < feature_count; k++)

                {

            //        printf("In Feature loop\n");

                    feature = classifier->haar_feature + k;

                          

                    // apply CMEM_getPhys to feature contents (rect)

                    // NOT NECESSARY (array pointer doesn't actually exist; feature->rect == (char *)feature + 4

                    //feature->rect = (unsigned int)CMEM_getPhys(feature->rect);

                    //fprintf(fp, "0x%08X\n", (unsigned int)feature->rect);

                }

                

            }

            

           

        }

        LOCAL_restore_hid_cascade(cascade->hid_cascade);     

    }



Int c6accel_test_cvHaarDetectObjects(C6accel_Handle hC6accel, char *image_file_name, char *cascade_file_name)

{

    IplImage *image, *image_color;

    CvSeq *arm_sequence = NULL, *dsp_sequence = NULL;

    CvHaarClassifierCascade *cascade;

    CvMemStorage *storage;

    CvRect *r;

    CvPoint c1, c2;

    CvScalar red = {0, 0, 255, 255}, black = {0, 0, 0, 255};

    struct timeval startTime, endTime;

    int t_overhead, t_algo, i;

    void *temp_ptr;

    

    printf("cvHaarDetectObjects Test (%s, %s)\n", image_file_name, cascade_file_name);

    

    // initialize timer

    t_overhead = get_overhead_time();

    printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0);

    printf("Memory for images,cascade and \n");

    // 1. Read input image and cascade files

    image       = cvLoadImage(image_file_name, CV_LOAD_IMAGE_GRAYSCALE);

    image_color = cvLoadImage(image_file_name, CV_LOAD_IMAGE_COLOR);

       

    // 2. Create memory storage space; use dummy allocation to prime for DSP

    storage = cvCreateMemStorage(0);

    printf("Memory allocation for images and storage done\n");

    cascade     = (CvHaarClassifierCascade *)cvLoad(cascade_file_name, 0, 0, 0);

    printf("Reading of Cascade complete\n");

    // 3.a Apply ARM algorithm

    arm_sequence = cvHaarDetectObjects(image_color, cascade, storage, 1.1, 2, CV_HAAR_DO_CANNY_PRUNING, cvSize(30, 30));

    gettimeofday(&startTime, NULL);

    printf("top= %x\n", storage->top);

    printf("bottom= %x\n", storage->bottom);

    for (i = 0; i < 1; i++)

        arm_sequence = cvHaarDetectObjects(image_color, cascade, storage, 1.1, 2, CV_HAAR_DO_CANNY_PRUNING, cvSize(30, 30));

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    printf("Called ARM HaarDetectObjects function (time: %f ms)\n", t_algo / 1000.0 / 1.0);

    // 4.a Mark and print list of matches detected by ARM

    if (arm_sequence == NULL)

    {

        printf("ARM sequence returned NULL\n");

    }

    else

    {

        printf("ARM sequence contains %i elements:\n", arm_sequence->total);

        for (i = 0; i < arm_sequence->total; i++){

            r = (CvRect *)cvGetSeqElem(arm_sequence, i);

            printf("%4i: %4i, %4i (%ix%i)\n", i, r->x, r->y, r->width, r->height);

            

            // mark with thick black rectangle

            c1.x = r->x;

            c1.y = r->y;

            c2.x = r->x + r->width;

            c2.y = r->y + r->height;

            cvRectangle(image_color, c1, c2, black, 2, 8, 0);

        }

    }

    cvReleaseHaarClassifierCascade(&cascade);

    cvReleaseMemStorage(&storage);

 



    // 3.b Apply DSP algorithm

    storage = cvCreateMemStorage(0);

    temp_ptr = cvMemStorageAlloc(storage, 64);

    printf("Memory allocation for images and storage done\n");

    cascade     = (CvHaarClassifierCascade *)cvLoad(cascade_file_name, 0, 0, 0);

    traverse_and_translate_cascade(cascade);



    gettimeofday(&startTime, NULL);

    C6accel_cvHaarDetectObjects(hC6accel, image_color, cascade, storage, 1.1, 2, CV_HAAR_DO_CANNY_PRUNING, cvSize(30, 30), &dsp_sequence);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    

    printf("Called DSP HaarDetectObjects function (time: %f ms)\n", t_algo / 1000.0);

        

    // 4.b Mark and print list of matches detected by DSP

    if (dsp_sequence == NULL)

    {

        printf("DSP sequence returned NULL\n");

    }

    else

    {

        printf("DSP sequence contains %i elements:\n", dsp_sequence->total);

        for (i = 0; i < dsp_sequence->total; i++)

        {

            r = (CvRect *)cvGetSeqElem(dsp_sequence, i);

            printf("%4i: %4i, %4i (%ix%i)\n", i, r->x, r->y, r->width, r->height);

            

            // mark with thin red rectangle

            c1.x = r->x;

            c1.y = r->y;

            c2.x = r->x + r->width;

            c2.y = r->y + r->height;

            cvRectangle(image_color, c1, c2, red, 1, 8, 0);

        }

    }

    

    // 7. Save marked image to filesystem

    cvSaveImage("./output.png", image_color, 0);

    cvReleaseImage(&image);

    cvReleaseImage(&image_color);



    

    LOCAL_restore_cascade(cascade);

    cvReleaseHaarClassifierCascade(&cascade);

    cvReleaseMemStorage(&storage);

    printf("C6accel_cvHaarDetectObjects test completed successfully\n");

    return 1;

}



Int c6accel_test_Cascade(char *cascade_file_name)

{

    

    CvHaarClassifierCascade *cascade;

    

    printf("Reading Cascade\n");

    cascade     = (CvHaarClassifierCascade *)cvLoad("opencv_images/haarcascade_frontalface_alt2.xml", 0, 0, 0);

    printf("Reading of Cascade complete\n");

    

    traverse_and_translate_cascade(cascade);

    printf("Traverse and translate complete\n");

    printf("count: %x\n", cascade->count);

    LOCAL_restore_cascade(cascade);

      

    cvReleaseHaarClassifierCascade(&cascade);



    printf("C6accel_Cascade test completed successfully\n");

    return 1;

}





Int c6accel_test_cvGoodFeaturesToTrack(C6accel_Handle hC6accel, char *input_file_name, int n)

{

    IplImage *input_image, *output_image, *eig_image, *temp_image;

    CvPoint2D32f *arm_corners, *dsp_corners, *arm_corners_rough, *dsp_corners_rough;

    int arm_cornerCount = 256, dsp_cornerCount = 256;

    CvScalar red = {0, 0, 255, 255}, black = {0, 0, 0, 255};

    CvPoint c1, c2;

    struct timeval startTime, endTime;

    int t_overhead, t_algo, i;

    

    printf("cvGoodFeaturesToTrack Test (%s, %i)\n", input_file_name, n);

    

    // initialize timer

    t_overhead = get_overhead_time();

    printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0);

    

    // 1. Read input image and create working images

    input_image  = cvLoadImage(input_file_name, CV_LOAD_IMAGE_GRAYSCALE);

   



    output_image = cvLoadImage(input_file_name, CV_LOAD_IMAGE_COLOR);

    eig_image    = cvCreateImage(cvGetSize(input_image), 32, 1);

    temp_image   = cvCreateImage(cvGetSize(input_image), 32, 1);

    

    // 2. Allocate output buffers

    arm_corners = (CvPoint2D32f *)cvAlloc(256 * sizeof(CvPoint2D32f));

    dsp_corners = (CvPoint2D32f *)cvAlloc(256 * sizeof(CvPoint2D32f));

    arm_corners_rough = (CvPoint2D32f *)cvAlloc(256 * sizeof(CvPoint2D32f));

    dsp_corners_rough = (CvPoint2D32f *)cvAlloc(256 * sizeof(CvPoint2D32f));

    

    // 3.a Apply ARM algorithm to find features

    cvGoodFeaturesToTrack(input_image, eig_image, temp_image, arm_corners, &arm_cornerCount, 0.01, 10.0, NULL, 3, 0, 0.04);

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvGoodFeaturesToTrack(input_image, eig_image, temp_image, arm_corners, &arm_cornerCount, 0.01, 10.0, NULL, 3, 0, 0.04);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    printf("Called ARM GoodFeaturesToTrack function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 3.b Apply DSP algorithm to find features

    C6accel_cvGoodFeaturesToTrack(hC6accel, input_image, eig_image, temp_image, dsp_corners,

                                  &dsp_cornerCount, 0.01, 10.0, NULL, 3, 0, 0.04);

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvGoodFeaturesToTrack(hC6accel, input_image, eig_image, temp_image, dsp_corners,

                                      &dsp_cornerCount, 0.01, 10.0, NULL, 3, 0, 0.04);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    printf("Called DSP GoodFeaturesToTrack function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 4. Save rough corner locations so we can accurately benchmark the refinement functions

    memcpy(arm_corners_rough, arm_corners, arm_cornerCount * sizeof(CvPoint2D32f));

    memcpy(dsp_corners_rough, dsp_corners, dsp_cornerCount * sizeof(CvPoint2D32f));

    // printf("I finished memcpy\n");

    // 4.a Apply ARM algorithm to refine features



    //cvFindCorner SubPix issue to be resolved

    cvFindCornerSubPix(input_image, dsp_corners, dsp_cornerCount, cvSize(10, 10), cvSize(-1, -1),

                                   cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.03));

    t_algo = 0;

    for (i = 0; i < n; i++)

    {

        memcpy(arm_corners, arm_corners_rough, arm_cornerCount * sizeof(CvPoint2D32f));

        printf("ARM Benchmark begin \n");

        gettimeofday(&startTime, NULL);

        cvFindCornerSubPix(input_image, arm_corners, arm_cornerCount, cvSize(10, 10), cvSize(-1, -1),

                           cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.03));

        gettimeofday(&endTime, NULL);

        t_algo += (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    }

    printf("Called ARM FindCornerSubPix function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 4.b Apply DSP algorithm to refine features

    C6accel_cvFindCornerSubPix(hC6accel, input_image, dsp_corners, dsp_cornerCount, cvSize(10, 10), cvSize(-1, -1),

                               cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.03));

    t_algo = 0;

    for (i = 0; i < n; i++)

    {

        memcpy(dsp_corners, dsp_corners_rough, dsp_cornerCount * sizeof(CvPoint2D32f));

        //printf("DSP Benchmark begin \n");

        gettimeofday(&startTime, NULL);

        C6accel_cvFindCornerSubPix(hC6accel, input_image, dsp_corners, dsp_cornerCount, cvSize(10, 10), cvSize(-1, -1),

                                   cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.03));

        gettimeofday(&endTime, NULL);

        t_algo += (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    }

    printf("Called DSP FindCornerSubPix function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 5.a Mark features detected by ARM and report number of elements

   /* printf("ARM list contains %i features.\n", arm_cornerCount);

    for (i = 0; i < arm_cornerCount; i++)

    {

        //printf("%3i: (%10f,%10f) -> (%10f,%10f)\n", i, 

        //       arm_corners_rough[i].x, arm_corners_rough[i].y,

        //       arm_corners[i].x, arm_corners[i].y);

        // mark with large black square

        c1.x = cvRound(arm_corners[i].x) - 2;

        c1.y = cvRound(arm_corners[i].y) - 2;

        c2.x = cvRound(arm_corners[i].x) + 2;

        c2.y = cvRound(arm_corners[i].y) + 2;

        cvRectangle(output_image, c1, c2, black, -1, 8, 0);

    }*/



    // 5.b Mark and print list of matches detected by DSP

    printf("DSP list contains %i features.\n", dsp_cornerCount);

    for (i = 0; i < dsp_cornerCount; i++)

    {

        //printf("%3i: (%10f,%10f) -> (%10f,%10f)\n", i, 

        //       dsp_corners_rough[i].x, dsp_corners_rough[i].y,

        //       dsp_corners[i].x, dsp_corners[i].y);

        // mark with small red square

        c1.x = cvRound(dsp_corners[i].x) - 1;

        c1.y = cvRound(dsp_corners[i].y) - 1;

        c2.x = cvRound(dsp_corners[i].x) + 1;

        c2.y = cvRound(dsp_corners[i].y) + 1;

        cvRectangle(output_image, c1, c2, red, -1, 8, 0);

    }

    

    // 5. Save marked image to filesystem

    cvSaveImage("./output.png", output_image, 0);

    printf("Saved image\n");

    //6. Free memory allocated for the images

    cvReleaseImage(&output_image);

    cvReleaseImage(&eig_image);

    cvReleaseImage(&input_image);

    cvReleaseImage(&temp_image);



    cvFree(&arm_corners);

    cvFree(&dsp_corners);

    printf("dsp_corners free\n");





    printf("dsp_corner_phys: %x\n",CMEM_getPhys(dsp_corners_rough));

    cvFree(&dsp_corners_rough);

	

    cvFree(&arm_corners_rough);

    

    printf("C6accel_cvGoodFeaturesToTrack test completed successfully\n");

    return 1;

}



Int c6accel_test_cvCalcOpticalFlowPyrLK(C6accel_Handle hC6accel, char *input_file_name_1, char *input_file_name_2, int n)

{

    IplImage *prev_input_image, *curr_input_image, *eig_image, *temp_image,

             *prev_pyramid, *curr_pyramid,

             *output_image;

    CvPoint2D32f *prev_corners, *arm_curr_corners, *dsp_curr_corners;

    char *arm_status, *dsp_status;

    int arm_cornerCount = 1024, dsp_cornerCount = 1024;

    CvScalar red = {0, 0, 255, 255}, black = {0, 0, 0, 255};

    CvPoint c1, c2;

    struct timeval startTime, endTime;

    int t_overhead, t_algo, i;

    

    printf("cvCalcOpticalFlowPyrLK Test (%s -> %s, %i)\n", input_file_name_1, input_file_name_2, n);

    

    // initialize timer

    t_overhead = get_overhead_time();

    printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0);

    

    // 1. Read input image and create working images

    prev_input_image = cvLoadImage(input_file_name_1, CV_LOAD_IMAGE_GRAYSCALE);

    curr_input_image = cvLoadImage(input_file_name_2, CV_LOAD_IMAGE_GRAYSCALE);

    output_image     = cvLoadImage(input_file_name_2, CV_LOAD_IMAGE_COLOR);

    

    eig_image        = cvCreateImage(cvGetSize(prev_input_image), 32, 1);

    temp_image       = cvCreateImage(cvGetSize(prev_input_image), 32, 1);

    prev_pyramid     = cvCreateImage(cvGetSize(prev_input_image), 8, 1);

    curr_pyramid     = cvCreateImage(cvGetSize(prev_input_image), 8, 1);

   

    // 2. Allocate output buffers

    prev_corners     = (CvPoint2D32f *)cvAlloc(arm_cornerCount * sizeof(CvPoint2D32f));

    arm_curr_corners = (CvPoint2D32f *)cvAlloc(arm_cornerCount * sizeof(CvPoint2D32f));

    dsp_curr_corners = (CvPoint2D32f *)cvAlloc(dsp_cornerCount * sizeof(CvPoint2D32f));

    arm_status  = (char *)cvAlloc(arm_cornerCount * sizeof(char));

    dsp_status  = (char *)cvAlloc(dsp_cornerCount * sizeof(char));

    

    // 3. Apply ARM algorithm to find features

    cvGoodFeaturesToTrack(prev_input_image, eig_image, temp_image, prev_corners, &arm_cornerCount, 0.01, 10.0, NULL, 3, 0, 0.04);

    dsp_cornerCount = arm_cornerCount;

    

    // 4.a Apply ARM algorithm to refine features

    cvCalcOpticalFlowPyrLK(prev_input_image, curr_input_image, prev_pyramid, curr_pyramid,

                           prev_corners, arm_curr_corners, arm_cornerCount, cvSize(10, 10), 3,

                           arm_status, NULL, cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.03), 0);

    gettimeofday(&startTime, NULL);

    

    for (i = 0; i < n; i++)

    {

        cvCalcOpticalFlowPyrLK(prev_input_image, curr_input_image, prev_pyramid, curr_pyramid,

                               prev_corners, arm_curr_corners, arm_cornerCount, cvSize(10, 10), 3,

                               arm_status, NULL, cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.03), 0);

    }

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    printf("Called ARM cvCalcOpticalFlowPyrLK function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 4.b Apply DSP algorithm to refine features

    C6accel_cvCalcOpticalFlowPyrLK(hC6accel, prev_input_image, curr_input_image, prev_pyramid, curr_pyramid,

                                   prev_corners, dsp_curr_corners, dsp_cornerCount, cvSize(10, 10), 3,

                                   dsp_status, NULL, cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.03), 0);

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

    {

        C6accel_cvCalcOpticalFlowPyrLK(hC6accel, prev_input_image, curr_input_image, prev_pyramid, curr_pyramid,

                                       prev_corners, dsp_curr_corners, dsp_cornerCount, cvSize(10, 10), 3,

                                       dsp_status, NULL, cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.03), 0);

    }

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    printf("Called DSP cvCalcOpticalFlowPyrLK function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 5.a Mark motion detected by ARM and report number of elements

    printf("ARM list contains %i features.\n", arm_cornerCount);

    for (i = 0; i < arm_cornerCount; i++)

    {

        //printf("%3i: (%10f,%10f) -> (%10f,%10f)\n", i, 

        //       arm_corners_rough[i].x, arm_corners_rough[i].y,

        //       arm_corners[i].x, arm_corners[i].y);

        // mark with thick black line

        c1.x = cvRound(prev_corners[i].x);

        c1.y = cvRound(prev_corners[i].y);

        c2.x = cvRound(arm_curr_corners[i].x);

        c2.y = cvRound(arm_curr_corners[i].y);

        cvCircle(

                    output_image,

                    c1,

                    2,

                    CVX_GRAY50,

                    -1,8,0

                );

        //cvLine(output_image, c1, c2, black, 2, 8, 0);

    }



    // 5.b Mark and print list of matches detected by DSP

    printf("DSP list contains %i features.\n", dsp_cornerCount);

    for (i = 0; i < dsp_cornerCount; i++)

    {

        //printf("%3i: (%10f,%10f) -> (%10f,%10f)\n", i, 

        //       dsp_corners_rough[i].x, dsp_corners_rough[i].y,

        //       dsp_corners[i].x, dsp_corners[i].y);

        // mark with thin red line

        c1.x = cvRound(prev_corners[i].x);

        c1.y = cvRound(prev_corners[i].y);

        c2.x = cvRound(dsp_curr_corners[i].x);

        c2.y = cvRound(dsp_curr_corners[i].y);

        cvLine(output_image, c1, c2, red, 1, 8, 0);

    }

    

    // 5. Save marked image to filesystem

    cvSaveImage("./output.png", output_image, 0);

    cvReleaseImage(&prev_input_image); 

    cvReleaseImage(&curr_input_image);

    cvReleaseImage(&output_image);    

    

    cvReleaseImage(&eig_image);       

    cvReleaseImage(&temp_image);      

    cvReleaseImage(&prev_pyramid);    

    cvReleaseImage(&curr_pyramid);    



 // 2. Allocate output buffers

    prev_corners     = cvFree(&prev_corners);

    arm_curr_corners = cvFree(&arm_curr_corners);

    dsp_curr_corners = cvFree(&dsp_curr_corners);

    arm_status  = cvFree(&arm_status);

    dsp_status  = cvFree(&dsp_status);

    printf("C6accel_cvCalcOpticalFlowPyrLK test completed successfully\n");

    return 1;

}



Int c6accel_test_cvMatchTemplate(C6accel_Handle hC6accel, char *input_file_name, char *template_file_name, int n)

{

    IplImage *input_image, *template_image, *arm_output_image, *dsp_output_image,

             *arm_scale_image, *dsp_scale_image;

    CvSize output_size;

    struct timeval startTime, endTime;

    int t_overhead, t_algo, i;

    

    printf("cvMatchTemplate Test (%s, %s, %i)\n", input_file_name, template_file_name, n);

    

    // initialize timer

    t_overhead = get_overhead_time();

    printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0);

    

    // 1. Read input and template images

    input_image    = cvLoadImage(input_file_name, CV_LOAD_IMAGE_GRAYSCALE);

    template_image = cvLoadImage(template_file_name, CV_LOAD_IMAGE_GRAYSCALE);

    

    // 2. Allocate output images

    output_size  = cvSize(input_image->width  - template_image->width  + 1,

                          input_image->height - template_image->height + 1);

    arm_output_image = cvCreateImage(output_size, 32, 1);

    arm_scale_image  = cvCreateImage(output_size, 8,  1);

    dsp_output_image = cvCreateImage(output_size, 32, 1);

    dsp_scale_image  = cvCreateImage(output_size, 8,  1);

    

    cvSetZero(arm_output_image);

    cvSetZero(dsp_output_image);

    

    // 3.a Apply ARM algorithm to match template

    cvMatchTemplate(input_image, template_image, arm_output_image, CV_TM_SQDIFF);

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvMatchTemplate(input_image, template_image, arm_output_image, CV_TM_SQDIFF);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    printf("Called ARM cvMatchTemplate function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 4.b Apply DSP algorithm to refine features

    C6accel_cvMatchTemplate(hC6accel, input_image, template_image, dsp_output_image, CV_TM_SQDIFF);

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvMatchTemplate(hC6accel, input_image, template_image, dsp_output_image, CV_TM_SQDIFF);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    printf("Called DSP cvMatchTemplate function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 5. Normalize output images

    cvNormalize(arm_output_image, arm_scale_image, 0, 255, CV_MINMAX, NULL);

    cvNormalize(dsp_output_image, dsp_scale_image, 0, 255, CV_MINMAX, NULL);

    

    // 6. Compare outputs

    printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(arm_scale_image, dsp_scale_image, CV_L2, NULL));

    

    // 7. Save output images to filesystem

    cvSaveImage("./output_arm.png", arm_scale_image, 0);

    cvSaveImage("./output_dsp.png", dsp_scale_image, 0);



    // 8. Free memory allocated to images

    cvReleaseImage(&template_image);

    cvReleaseImage(&input_image);

    cvReleaseImage(&arm_output_image);

    cvReleaseImage(&arm_scale_image);

    cvReleaseImage(&dsp_output_image);

    cvReleaseImage(&dsp_scale_image);



    printf("C6accel_cvMatchTemplate test completed successfully\n");

    return 1;

}



Int c6accel_test_cvMulSpectrums(C6accel_Handle hC6accel, char *input_file_name_1, char *input_file_name_2, int n)

{

    IplImage *input_image_1, *input_image_2, *input_image_1f, *input_image_2f,

             *dft_image_1, *dft_image_2,

             *arm_mult_image, *dsp_mult_image, *arm_idft_image,

             *dsp_idft_image, *arm_norm_image, *dsp_norm_image;

    CvSize dft_size;

    struct timeval startTime, endTime;

    int t_overhead, t_algo, i;

    

    printf("cvMulSpectrums Test (%s, %s, %i)\n", input_file_name_1, input_file_name_2, n);

    

    // initialize timer

    t_overhead = get_overhead_time();

    printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0);

    

    // 1. Read input images and check that sizes match

    input_image_1 = cvLoadImage(input_file_name_1, CV_LOAD_IMAGE_GRAYSCALE);

    input_image_2 = cvLoadImage(input_file_name_2, CV_LOAD_IMAGE_GRAYSCALE);

    if ((input_image_1->width  != input_image_2->width)   ||

        (input_image_1->height != input_image_2->height))

    {

        printf("Image size mismatch; cvMulSpectrums Test aborted!\n");

        return -1;

    }

    

    // 2. Allocate working and output images (and convert input images to floating point)

    dft_size = cvSize(input_image_1->width, input_image_1->height);

    input_image_1f = cvCreateImage(dft_size, IPL_DEPTH_32F, 1);

    input_image_2f = cvCreateImage(dft_size, IPL_DEPTH_32F, 1);

    dft_image_1    = cvCreateImage(dft_size, IPL_DEPTH_32F, 1);

    dft_image_2    = cvCreateImage(dft_size, IPL_DEPTH_32F, 1);

    arm_mult_image = cvCreateImage(dft_size, IPL_DEPTH_32F, 1);

    arm_idft_image = cvCreateImage(dft_size, IPL_DEPTH_32F, 1);

    arm_norm_image = cvCreateImage(dft_size, IPL_DEPTH_32F, 1);

    dsp_mult_image = cvCreateImage(dft_size, IPL_DEPTH_32F, 1);

    dsp_idft_image = cvCreateImage(dft_size, IPL_DEPTH_32F, 1);

    dsp_norm_image = cvCreateImage(dft_size, IPL_DEPTH_32F, 1);

    cvConvertScale(input_image_1, input_image_1f, 1.0f / 255.0f, 0);

    cvConvertScale(input_image_2, input_image_2f, 1.0f / 255.0f, 0);

    

    // 3. Apply ARM algorithm to calculate DFT for images 1, 2 (don't time)

    cvDFT(input_image_1f, dft_image_1, CV_DXT_FORWARD, 0);

    cvDFT(input_image_2f, dft_image_2, CV_DXT_FORWARD, 0);

    

    // 4.a Apply ARM algorithm to multiply spectrums

    cvMulSpectrums(dft_image_1, dft_image_2, arm_mult_image, 0);

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvMulSpectrums(dft_image_1, dft_image_2, arm_mult_image, 0);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    printf("Called ARM cvMulSpectrums function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 4.b Apply DSP algorithm to multiply spectrums

    C6accel_cvMulSpectrums(hC6accel, dft_image_1, dft_image_2, dsp_mult_image, 0);

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvMulSpectrums(hC6accel, dft_image_1, dft_image_2, dsp_mult_image, 0);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    printf("Called DSP cvMulSpectrums function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 5.a Apply ARM IDFT algorithm

    cvDFT(arm_mult_image, arm_idft_image, CV_DXT_INVERSE, 0);

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvDFT(arm_mult_image, arm_idft_image, CV_DXT_INVERSE, 0);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    printf("Called ARM cvDFT function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 5.b Apply DSP IDFT algorithm

    C6accel_cvDFT(hC6accel, dsp_mult_image, dsp_idft_image, CV_DXT_INVERSE, 0);

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvDFT(hC6accel, dsp_mult_image, dsp_idft_image, CV_DXT_INVERSE, 0);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    printf("Called DSP cvDFT function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 6.a Apply ARM algorithm to normalize image

    cvNormalize(arm_idft_image, arm_norm_image, 0, 255, CV_MINMAX, NULL);

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvNormalize(arm_idft_image, arm_norm_image, 0, 255, CV_MINMAX, NULL);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    printf("Called ARM cvNormalize function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 6.b Apply DSP algorithm to normalize image

    C6accel_cvNormalize(hC6accel, dsp_idft_image, dsp_norm_image, 0, 255, CV_MINMAX, NULL);

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvNormalize(hC6accel, dsp_idft_image, dsp_norm_image, 0, 255, CV_MINMAX, NULL);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    printf("Called DSP cvNormalize function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 7. Compare outputs

    printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(arm_norm_image, dsp_norm_image, CV_L2, NULL));

    

    // 8. Save output images to filesystem

    cvSaveImage("./output_arm.png", arm_norm_image, 0);

    cvSaveImage("./output_dsp.png", dsp_norm_image, 0);

    

    //9. Free memory allocated to the images

     cvReleaseImage(&input_image_1);

     cvReleaseImage(&input_image_2);

     cvReleaseImage(&dft_image_1);

     cvReleaseImage(&dft_image_2);

     cvReleaseImage(&input_image_1f);  

     cvReleaseImage(&input_image_2f);

     cvReleaseImage(&arm_mult_image);

     cvReleaseImage(&arm_idft_image);

     cvReleaseImage(&arm_norm_image);

     cvReleaseImage(&dsp_mult_image);

     cvReleaseImage(&dsp_idft_image);

     cvReleaseImage(&dsp_norm_image);

    printf("C6accel_cvMulSpectrums test completed successfully\n");

    return 1;

}



Int c6accel_test_cvNorm(C6accel_Handle hC6accel, char *input_file_name, int n)

{

    IplImage *input_image;

    double arm_min_val, arm_max_val, dsp_min_val, dsp_max_val,

           arm_norm, dsp_norm;

    CvPoint arm_min_loc, arm_max_loc, dsp_min_loc, dsp_max_loc;

    struct timeval startTime, endTime;

    int t_overhead, t_algo, i;

    

    printf("cvNorm Test (%s, %i)\n", input_file_name, n);

    

    // initialize timer

    t_overhead = get_overhead_time();

    printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0);

    

    // 1. Read input image

    input_image = cvLoadImage(input_file_name, CV_LOAD_IMAGE_GRAYSCALE);



    // 2.a Apply ARM algorithm to find min/max pixels

    cvMinMaxLoc(input_image, &arm_min_val, &arm_max_val, &arm_min_loc, &arm_max_loc, NULL);

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvMinMaxLoc(input_image, &arm_min_val, &arm_max_val, &arm_min_loc, &arm_max_loc, NULL);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    printf("Called ARM cvMinMaxLoc function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 2.b Apply DSP algorithm to find min/max pixels

    C6accel_cvMinMaxLoc(hC6accel, input_image, &dsp_min_val, &dsp_max_val, &dsp_min_loc, &dsp_max_loc, NULL);

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvMinMaxLoc(hC6accel, input_image, &dsp_min_val, &dsp_max_val, &dsp_min_loc, &dsp_max_loc, NULL);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    printf("Called DSP cvMinMaxLoc function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 3.a Apply ARM algorithm to find norm

    arm_norm = cvNorm(input_image, NULL, CV_L2, NULL);

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        arm_norm = cvNorm(input_image, NULL, CV_L2, NULL);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    printf("Called ARM cvNorm function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 3.b Apply DSP algorithm to find norm

    C6accel_cvNorm(hC6accel, input_image, NULL, CV_L2, NULL, &dsp_norm);

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvNorm(hC6accel, input_image, NULL, CV_L2, NULL, &dsp_norm);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    printf("Called DSP cvNorm function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 4. Compare outputs

    printf("ARM image statistics:\n\tL2 Norm:\t%f\n\tMin Val:\t%f\t(%i, %i)\n\tMax Val:\t%f\t(%i, %i)\n",

           arm_norm, arm_min_val, arm_min_loc.x, arm_min_loc.y, arm_max_val, arm_max_loc.x, arm_max_loc.y);

    printf("DSP image statistics:\n\tL2 Norm:\t%f\n\tMin Val:\t%f\t(%i, %i)\n\tMax Val:\t%f\t(%i, %i)\n",

           dsp_norm, dsp_min_val, dsp_min_loc.x, dsp_min_loc.y, dsp_max_val, dsp_max_loc.x, dsp_max_loc.y);

    

    cvReleaseImage(&input_image);

    printf("C6accel_cvNorm test completed successfully\n");



    return 1;

}



Int c6accel_test_cvIntegral(C6accel_Handle hC6accel, char *input_file_name, int n)

{

    IplImage *input_image, *arm_sum_image, *arm_norm_image,

             *dsp_sum_image, *dsp_norm_image;

    CvSize integral_size;

    struct timeval startTime, endTime;

    int t_overhead, t_algo, i;

    

    printf("cvIntegral Test (%s, %i)\n", input_file_name, n);

    

    // initialize timer

    t_overhead = get_overhead_time();

    printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0);

    

    // 1. Read input image

    input_image = cvLoadImage(input_file_name, CV_LOAD_IMAGE_GRAYSCALE);

    

    // 2. Create output images

    integral_size = cvSize(input_image->width + 1, input_image->height + 1);

    arm_sum_image =  cvCreateImage(integral_size, IPL_DEPTH_32S, 1);

    arm_norm_image = cvCreateImage(integral_size, IPL_DEPTH_32S, 1);

    dsp_sum_image =  cvCreateImage(integral_size, IPL_DEPTH_32S, 1);

    dsp_norm_image = cvCreateImage(integral_size, IPL_DEPTH_32S, 1);



    // 3.a Apply ARM integral algorithm

    cvIntegral(input_image, arm_sum_image, NULL, NULL);

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvIntegral(input_image, arm_sum_image, NULL, NULL);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    printf("Called ARM cvIntegral function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 3.b Apply DSP integral algorithm

    C6accel_cvIntegral(hC6accel, input_image, dsp_sum_image, NULL, NULL);

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvIntegral(hC6accel, input_image, dsp_sum_image, NULL, NULL);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    printf("Called DSP cvIntegral function (time: %f ms)\n", t_algo / 1000.0 / n);

    

    // 4. Normalize output images

    cvNormalize(arm_sum_image, arm_norm_image, 0, 255, CV_MINMAX, NULL);

    cvNormalize(dsp_sum_image, dsp_norm_image, 0, 255, CV_MINMAX, NULL);

    

    // 5. Compare outputs

    printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(arm_norm_image, dsp_norm_image, CV_L2, NULL));

    

    // 6. Save output images to filesystem

    cvSaveImage("./output_arm.png", arm_norm_image, 0);

    cvSaveImage("./output_dsp.png", dsp_norm_image, 0);

    

    // 7. Free memory allocated to images

    cvReleaseImage(&input_image);

    cvReleaseImage(&arm_sum_image);

    cvReleaseImage(&arm_norm_image);

    cvReleaseImage(&dsp_sum_image);

    cvReleaseImage(&dsp_norm_image);



    printf("C6accel_cvIntegral test completed successfully\n");

    return 1;

}



Int c6accel_test_cvAdd(C6accel_Handle hC6accel, char *input_file_name1,char *input_file_name2, int n)

{

    IplImage *inputImg1,*inputImg2, *outputImg_arm, *outputImg_dsp;

    struct timeval startTime, endTime;

    int t_overhead, t_algo, i;

    float t_avg;

    

    printf("cvAdd Test (%s, %i iterations)\n", input_file_name1, n);

    

    // initialize timer

    t_overhead = get_overhead_time();

    printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0);

    

    // 1. Read input images from file

    inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR);

    inputImg2 = cvLoadImage( input_file_name2, CV_LOAD_IMAGE_COLOR);

    

    // 2. Allocate output images (must have same depth, channels as input)

    outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels);

    outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels);

    

    // 3.a Apply ARM algorithm

    cvAdd(inputImg1,inputImg2,outputImg_arm,NULL); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvAdd(inputImg1,inputImg2, outputImg_arm, NULL);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called ARM Add function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 3.b Apply DSP algorithm

    C6accel_cvAdd(hC6accel, inputImg1,inputImg2,outputImg_dsp,NULL); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvAdd(hC6accel, inputImg1, inputImg2, outputImg_dsp,NULL);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called DSP Add function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

        

    // 4. Compare outputs

    printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL));

    

    // 5. Save outputs to filesystem

    cvSaveImage("./output_arm.png", outputImg_arm, 0);

    cvSaveImage("./output_dsp.png", outputImg_dsp, 0);

    

    // 6. Free memory allocated to images

    cvReleaseImage(&inputImg1);

    cvReleaseImage(&inputImg2);

    cvReleaseImage(&outputImg_arm);

    cvReleaseImage(&outputImg_dsp);



    printf("C6accel_cvAdd test completed successfully; outputs saved to filesystem\n");

    return 1;

}



Int c6accel_test_cvAddS(C6accel_Handle hC6accel, char *input_file_name1, int n)

{

    IplImage *inputImg1, *outputImg_arm, *outputImg_dsp;

    struct timeval startTime, endTime;

    int t_overhead, t_algo, i;

    float t_avg;

    CvScalar red = {0, 0, 255, 255};

    printf("cvAddS Test (%s, %i iterations)\n", input_file_name1, n);

    

    // initialize timer

    t_overhead = get_overhead_time();

    printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0);

    

    // 1. Read input images from file

    inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR);

    

    // 2. Allocate output images (must have same depth, channels as input)

    outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels);

    outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels);

    

    // 3.a Apply ARM algorithm

    cvAddS(inputImg1,red,outputImg_arm,NULL); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvAddS(inputImg1,red, outputImg_arm, NULL);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called ARM Add function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 3.b Apply DSP algorithm

    C6accel_cvAddS(hC6accel, inputImg1,red,outputImg_dsp,NULL); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvAddS(hC6accel, inputImg1, red, outputImg_dsp,NULL);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called DSP AddS function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

        

    // 5. Compare outputs

    printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL));

    

    // 6. Save outputs to filesystem

    cvSaveImage("./output_arm.png", outputImg_arm, 0);

    cvSaveImage("./output_dsp.png", outputImg_dsp, 0);



    //7. Free memory allocated to images

    cvReleaseImage(&inputImg1);

    cvReleaseImage(&outputImg_arm);

    cvReleaseImage(&outputImg_dsp);



    printf("C6accel_cvAddS test completed successfully; outputs saved to filesystem\n");

    return 1;

}



Int c6accel_test_cvAbsDiff(C6accel_Handle hC6accel, char *input_file_name1,char *input_file_name2, int n)

{

    IplImage *inputImg1,*inputImg2, *outputImg_arm, *outputImg_dsp;

    struct timeval startTime, endTime;

    int t_overhead, t_algo, i;

    float t_avg;

    

    printf("cvAbsDiff Test (%s, %i iterations)\n", input_file_name1, n);

    

    // initialize timer

    t_overhead = get_overhead_time();

    printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0);

    

    // 1. Read input images from file

    inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR);

    inputImg2 = cvLoadImage( input_file_name2, CV_LOAD_IMAGE_COLOR);

    

    // 2. Allocate output images (must have same depth, channels as input)

    outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels);

    outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels);

    

    // 3.a Apply ARM algorithm

    cvAbsDiff(inputImg1,inputImg2,outputImg_arm); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvAbsDiff(inputImg1,inputImg2, outputImg_arm);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called ARM AbsDiff function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 3.b Apply DSP algorithm

    C6accel_cvAbsDiff(hC6accel, inputImg1,inputImg2,outputImg_dsp); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvAbsDiff(hC6accel, inputImg1, inputImg2, outputImg_dsp);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called DSP AbsDiff function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

        

    // 5. Compare outputs

    printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL));

    

    // 6. Save outputs to filesystem

    cvSaveImage("./output_arm.png", outputImg_arm, 0);

    cvSaveImage("./output_dsp.png", outputImg_dsp, 0);

    

    //7. Free memeory allocated to images

    cvReleaseImage(&inputImg1);

    cvReleaseImage(&inputImg2);

    cvReleaseImage(&outputImg_arm);

    cvReleaseImage(&outputImg_dsp);



    printf("C6accel_cvAbsDiff test completed successfully; outputs saved to filesystem\n");

    return 1;

}



Int c6accel_test_cvAbsDiffS(C6accel_Handle hC6accel, char *input_file_name1, int n)

{

    IplImage *inputImg1, *outputImg_arm, *outputImg_dsp;

    struct timeval startTime, endTime;

    int t_overhead, t_algo, i;

    float t_avg;

    CvScalar value = cvScalarAll(0.0);

    printf("cvAbsDiffS Test (%s, %i iterations)\n", input_file_name1, n);

    

    // initialize timer

    t_overhead = get_overhead_time();

    printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0);

    

    // 1. Read input images from file

    inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR);

    

    // 2. Allocate output images (must have same depth, channels as input)

    outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels);

    outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels);

    

    // 3.a Apply ARM algorithm

    cvAbsDiffS(inputImg1,outputImg_arm,value); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        cvAbsDiffS(inputImg1, outputImg_arm,value);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;



    printf("Called ARM AbsDiffS function %i times (average time: %f ms)\n", n, t_avg / 1000.0);



    

    // 3.b Apply DSP algorithm

    C6accel_cvAbsDiffS(hC6accel, inputImg1,outputImg_arm,value); // run once before timing

    gettimeofday(&startTime, NULL);

    for (i = 0; i < n; i++)

        C6accel_cvAbsDiffS(hC6accel, inputImg1, outputImg_dsp,value);

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called DSP AbsDiffS function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // 4. Compare outputs

    printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL));



    // 5. Save outputs to filesystem

    cvSaveImage("./output_arm.png", outputImg_arm, 0);

    cvSaveImage("./output_dsp.png", outputImg_dsp, 0);



   //6. Free memory allocated to images

    cvReleaseImage(&inputImg1);

    cvReleaseImage(&outputImg_arm);

    cvReleaseImage(&outputImg_dsp);



    printf("C6accel_cvAbsDiffS test completed successfully; outputs saved to filesystem\n");



    return 1;



}







Int C6accel_test_contours(C6accel_Handle hC6accel, char *input_file_name1, int n)

{

        IplImage *inputImg1,  *inputImg2, *outputImg_arm, *outputImg_dsp;

        IplImage*	g_gray = NULL;

        double		g_thresh = 100.0;

        CvSeq* contours = 0, *contour2;

        CvMemStorage* 	g_storage = NULL;

        int status;

        struct timeval startTime, endTime;

        int t_overhead=0, t_algo, i;

        double t_br_arm=0.0, t_dc_arm=0.0, t_ca_arm=0.0,t_br_dsp=0.0, t_dc_dsp=0.0, t_ca_dsp=0.0;

        float t_avg;

        CvScalar value = cvScalarAll(0.0);

        CvRect boundbox;

        double area,area_arm,area_dsp, Total_area_arm, Total_area_dsp;



        //Important: For DSP implementation pass pointer and allocate memroy from CMEM

        CvRect *boundbox_ptr;



        void *temp_ptr;

        printf("cvThreshold Test (%s, %i iterations)\n", input_file_name1, n);

        

       // initialize timer

         t_overhead = get_overhead_time();

         printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0);

      

       // 1. Read input images from file

        inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR);

        inputImg2 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR);

        

       //allocate memory for CvRect from CMEM

        boundbox_ptr=Memory_alloc(sizeof(CvRect), &testfxnsMemParams);



       // 2. Allocate output images (must have same depth, channels as input)

       // cvThreshold supports only single chanel output(8bit)

       outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), 8,1);

       outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), 8,1);

      

       g_gray = cvCreateImage(cvSize(inputImg1->width, inputImg1->height),8,1);



       //3. Create storage for the contour

        g_storage = cvCreateMemStorage(0);

        temp_ptr = cvMemStorageAlloc(g_storage, 64);

	contours = cvCreateSeq(0,sizeof(CvSeq),sizeof(CvPoint), g_storage);

	cvCvtColor( inputImg1, g_gray, CV_BGR2GRAY );

        gettimeofday(&startTime, NULL);

        // 3.a Apply ARM algorithm

        for (i = 0; i < n; i++)

           cvThreshold(g_gray, outputImg_arm , g_thresh, 255.0, CV_THRESH_BINARY );

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called ARM cvThreshold function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

        // 3.b Apply DSP algorithm

        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

        status = C6accel_cvThreshold(hC6accel, g_gray, outputImg_dsp , g_thresh, 255.0, CV_THRESH_BINARY );



        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called DSP cvThreshold function %i times (average time: %f ms)\n", n, t_avg / 1000.0);



         // 5. Compare outputs

        printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL));

        cvSaveImage("./output_arm_thresh.png",outputImg_arm , 0);

        cvSaveImage("./output_dsp_thresh.png",outputImg_dsp , 0);



        printf("Find Contours Called\n");

        //6 Test for drawing functions and contour features :Bounding Rect, DrawContours, ContourArea        

	C6accel_cvFindContours(hC6accel,outputImg_arm, g_storage, &contours, sizeof(CvContour),CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE,cvPoint(0,0) );

        printf("Find Contours Complete\n");

        gettimeofday(&startTime, NULL);

        boundbox = cvBoundingRect(contours,1);

        gettimeofday(&endTime, NULL);

        

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called ARM cvBoundingRect function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

        

        gettimeofday(&startTime, NULL);

        C6accel_cvBoundingRect(hC6accel,contours,boundbox_ptr,1);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;    

        printf("Called DSP cvBoundingRect function %i times (average time: %f ms)\n", n, t_avg / 1000.0);





        gettimeofday(&startTime, NULL);

        cvDrawContours(inputImg1, contours,CV_RGB(255,0,0),CV_RGB(0,255,0),-1, 1,8, cvPoint(0,0) );

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;    

        printf("Called ARM cvDrawContour function %i times (average time: %f ms)\n", n, t_avg / 1000.0);



        gettimeofday(&startTime, NULL);

         C6accel_cvDrawContours(hC6accel,inputImg2, contours,CV_RGB(255,0,0),CV_RGB(0,255,0),-1, 1,8, cvPoint(0,0) );

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;    

        printf("Called DSP cvDrawContour function %i times (average time: %f ms)\n", n, t_avg / 1000.0);



        gettimeofday(&startTime, NULL);

        cvContourArea( (void *)contours,CV_WHOLE_SEQ,0);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;    

        printf("Called ARM cvContourArea function %i times (average time: %f ms)\n", n, t_avg / 1000.0);



        gettimeofday(&startTime, NULL);

         C6accel_cvContourArea(hC6accel, contours,CV_WHOLE_SEQ,&area );

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;    

        printf("Called DSP cvContourArea function %i times (average time: %f ms)\n", n, t_avg / 1000.0);





        i=0;

        Total_area_arm = 0.0;

        Total_area_dsp = 0.0;

       

        gettimeofday(&startTime, NULL);  

        for(; contours; contours = contours->h_next) {

         if( contours ){

                i++;

                //ARM code

                boundbox = cvBoundingRect(contours,1);

                cvRectangle(inputImg1,                    

                            cvPoint(boundbox.x, boundbox.y),        

                            cvPoint(boundbox.x+boundbox.width, boundbox.y+boundbox.height),      

                            cvScalar(255,0,0,0),

                             1, 8, 0);  

        

                //DSP code

                //Find minimal bounding box for each sequence

                 C6accel_cvBoundingRect(hC6accel,contours,boundbox_ptr,1);

         

                cvRectangle(inputImg2,                    

                            cvPoint(boundbox_ptr->x, boundbox_ptr->y),        

                            cvPoint(boundbox_ptr->x+boundbox_ptr->width, boundbox_ptr->y+boundbox_ptr->height),      

                            cvScalar(255,0,0,0),

                            1, 8, 0); 

               

         

                cvDrawContours(inputImg1, contours,CV_RGB(255,0,0),CV_RGB(0,255,0),-1, 1,8, cvPoint(0,0) );

            

                C6accel_cvDrawContours(hC6accel,inputImg2, contours,CV_RGB(255,0,0),CV_RGB(0,255,0),-1, 1,8, cvPoint(0,0) );

         

                area_arm = fabs(cvContourArea( (void *)contours,CV_WHOLE_SEQ,0));

                Total_area_arm += area_arm;

                        

                C6accel_cvContourArea(hC6accel, contours,CV_WHOLE_SEQ,&area );

               

                Total_area_dsp += fabs(area);

                

	  }

     }

     gettimeofday(&endTime, NULL);

     t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

     t_avg = (float)t_algo / (float)n;    

     printf("Called DSP Plotting function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

     printf("Difference between total area calculated using ContourArea =%f\n", fabs(Total_area_dsp-Total_area_arm) );





     // 6. Save outputs to filesystem

     cvSaveImage("./output_arm_contour.png",inputImg1 , 0);

     cvSaveImage("./output_dsp_contour.png",inputImg2 , 0);

 

     //7. Free memory allocated to images

     cvReleaseImage(&inputImg1);

     cvReleaseImage(&g_gray);

     cvReleaseImage(&inputImg2);

     cvReleaseImage(&outputImg_arm);

     cvReleaseImage(&outputImg_dsp);

     cvReleaseMemStorage(&g_storage);



     printf("Test for contours completed successfully; outputs saved to filesystem\n");

     return 1;

}



Int C6Accel_test_Matchshapes(C6accel_Handle hC6accel, char *input_file_name1, char *input_file_name2,int n)

{

  IplImage *inputImg1,  *inputImg2, *outputImg_arm, *outputImg_dsp;

  IplImage*	g_gray = NULL;

  double		g_thresh = 100.0;

  CvSeq *contour1,*contour2, *tmp1, *tmp2;

  CvMemStorage* 	g_storage_1 = NULL;

   CvMemStorage* 	g_storage_2 = NULL;

  int status;

  struct timeval startTime, endTime;

  int t_overhead, t_algo, i;

  float t_avg;

  void *temp_ptr_1,*temp_ptr_2;

  double measure,measure_arm;

  unsigned int var;



       // 1. Read input images from file

       inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR);

       inputImg2 = cvLoadImage( input_file_name2, CV_LOAD_IMAGE_COLOR);

       // 2. Allocate output images (must have same depth, channels as input)

       // cvThreshold supports only single chanel output(8bit)

       outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), 8,1);

       outputImg_dsp = cvCreateImage(cvSize(inputImg2->width, inputImg1->height), 8,1);

      

       g_gray = cvCreateImage(cvSize(inputImg1->width, inputImg1->height),8,1);

       

       //3.Create storage for the contour

        g_storage_1 = cvCreateMemStorage(0);

        temp_ptr_1 = cvMemStorageAlloc(g_storage_1, 64);

        

	contour1 = cvCreateSeq(0,sizeof(CvSeq),sizeof(CvPoint), g_storage_1);

  

	cvCvtColor( inputImg1, g_gray, CV_BGR2GRAY );

        cvThreshold(g_gray, outputImg_arm , g_thresh, 255.0, CV_THRESH_BINARY );

        

       

        cvFindContours(outputImg_arm,g_storage_1, &contour1,sizeof(CvContour), CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE,cvPoint(0,0));

     

         g_storage_2 = cvCreateMemStorage(0);

        temp_ptr_2 = cvMemStorageAlloc(g_storage_2, 64);

        

	contour2 = cvCreateSeq(0,sizeof(CvSeq),sizeof(CvPoint), g_storage_1);

  

	cvCvtColor( inputImg2, g_gray, CV_BGR2GRAY );

        cvThreshold(g_gray, outputImg_dsp , g_thresh, 255.0, CV_THRESH_BINARY );

        

       

        cvFindContours(outputImg_dsp,g_storage_2, &contour2,sizeof(CvContour), CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE,cvPoint(0,0));



       tmp1= contour1;

       tmp2= contour2;

       cvCopy(inputImg2,inputImg1,0);



       //ARM: Benchmark loop

       gettimeofday(&startTime, NULL);

       for(i=0; contour1,contour2; contour1 = contour1->h_next,contour2 = contour2->h_next) {

           measure_arm = cvMatchShapes(contour2,contour1, 2,1.0);

        }

       gettimeofday(&endTime, NULL);

        

       //ARM: Draw loop

       contour1=tmp1;

       contour2=tmp2;

       for(i=0; contour1,contour2; contour1 = contour1->h_next,contour2 = contour2->h_next) {

                 measure_arm = cvMatchShapes(contour2,contour1, 2,1.0);

                //Code to plot matched and unmatched contours

                  printf("match_arm = %f\n",(double)measure_arm);

                 if(measure_arm<0.7){ //arbitary threshold

                     cvDrawContours(inputImg1, contour2,CV_RGB(0,255,0),CV_RGB(0,255,0),-1, 3,8, cvPoint(0,0) );

                    }

                   if(measure_arm>=0.7){//arbitary threshold

                     cvDrawContours(inputImg1, contour2,CV_RGB(0,0,255),CV_RGB(0,0,255),-1, 3,8, cvPoint(0,0) );

                   }

     

       }

       t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

       t_avg = (float)t_algo / (float)n;



       printf("Called ARM cvMatchShapes function %i times (average time: %f ms)\n", n, t_avg / 1000.0);



       contour1=tmp1;

       contour2=tmp2;

       //DSP: Benchmark loop

       gettimeofday(&startTime, NULL);

       for(i=0; contour1,contour2; contour1 = contour1->h_next,contour2 = contour2->h_next) {

           C6accel_cvMatchShapes(hC6accel,contour1,contour2, 2,1.0,&measure);

        }



      gettimeofday(&endTime, NULL);

      t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

      t_avg = (float)t_algo / (float)n;



      printf("Called DSP cvMatchShapes function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

      contour1=tmp1;

      contour2=tmp2;

      //DSP: Draw loop

      for(i=0; contour1,contour2; contour1 = contour1->h_next,contour2 = contour2->h_next) {

           C6accel_cvMatchShapes(hC6accel,contour1,contour2, 2,1.0,&measure);

           printf("match_dsp = %f\n",(double)measure);

           if(measure<0.7){

              cvDrawContours(inputImg2, contour2,CV_RGB(0,255,0),CV_RGB(0,255,0),-1, 3,8, cvPoint(0,0) );

           }

           if(measure>=0.7){

              cvDrawContours(inputImg2, contour2,CV_RGB(0,0,255),CV_RGB(0,0,255),-1, 3,8, cvPoint(0,0) );

           }

        }



      gettimeofday(&endTime, NULL);

      // 6. Save outputs to filesystem

        cvSaveImage("./output_arm_matchcontour.png",inputImg1 , 0);

        cvSaveImage("./output_dsp_matchcontour.png",inputImg2 , 0);

    

     //7. Free memory allocated to images

        cvReleaseImage(&inputImg1);

        cvReleaseImage(&inputImg2);

        cvReleaseImage(&g_gray);

        cvReleaseMemStorage(&g_storage_1);

        cvReleaseMemStorage(&g_storage_2);

        cvReleaseImage(&outputImg_arm);

        cvReleaseImage(&outputImg_dsp);





        return 0;

}

       

Int C6Accel_test_FindContours(C6accel_Handle hC6accel, char *input_file_name1, int n)



{

  IplImage *inputImg1,  *inputImg2, *outputImg_arm, *outputImg_dsp;

  IplImage*	g_gray = NULL;

  double		g_thresh = 100.0;

  CvSeq *contour1 = NULL,*contour2=NULL , *tmp1, *tmp2;

  CvMemStorage* 	g_storage_1 = NULL;

  CvMemStorage* 	g_storage_2 = NULL;

  int status;

  struct timeval startTime, endTime;

  int t_overhead, t_algo, i;

  float t_avg;

  void *temp_ptr_1,*temp_ptr_2;

  double measure,measure_arm;

  unsigned int var;

  int storage_size =  200*1024; // 200K /*** Storage size from one image to other *****/

   



 // 1. Read input images from file

   inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR);

   printf("inp_image->imagedata= %x\n",CMEM_getPhys(inputImg1->imageData));

   inputImg2 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR);

 // 2. Allocate output images (must have same depth, channels as input)

 // cvThreshold supports only single chanel output(8bit)

    outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), 8,1);

    outputImg_dsp = cvCreateImage(cvSize(inputImg2->width, inputImg2->height), 8,1);

   

    g_gray = cvCreateImage(cvSize(inputImg1->width, inputImg1->height),8,1);

       

 //Create storage for the contour

 //ARM loop       

    g_storage_1 = cvCreateMemStorage(storage_size);

    temp_ptr_1 = cvMemStorageAlloc(g_storage_1, 64);

        

    cvCvtColor( inputImg1, g_gray, CV_BGR2GRAY );

    cvThreshold(g_gray, outputImg_arm , g_thresh, 255.0, CV_THRESH_BINARY );



    gettimeofday(&startTime, NULL);

    cvFindContours(outputImg_arm,g_storage_1, &contour1,sizeof(CvContour), CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE,cvPoint(0,0));

    gettimeofday(&endTime, NULL);

    t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

    t_avg = (float)t_algo / (float)n;

    printf("Called ARM cvFindContours function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    for(i=0;contour1;contour1=contour1->h_next,i++)  {

     //  status = contour1->flags;

     //    printf("Flag=%x\n",status);

       cvDrawContours(inputImg1, contour1,CV_RGB(255,0,0),CV_RGB(0,255,0),-1, 1,8, cvPoint(0,0) );

    }

   // printf("Press Enter\n");

    // getchar();

    

    //DSP loop 

      g_storage_2 = cvCreateMemStorage(storage_size);

      temp_ptr_2 = cvMemStorageAlloc(g_storage_2, 64);

        

      contour2 = cvCreateSeq(0,sizeof(CvSeq),sizeof(CvPoint), g_storage_1);

      cvCvtColor( inputImg2, g_gray, CV_BGR2GRAY );

      cvThreshold(g_gray, outputImg_dsp , g_thresh, 255.0, CV_THRESH_BINARY );

        

      gettimeofday(&startTime, NULL);

        C6accel_cvFindContours(hC6accel, outputImg_dsp,g_storage_2, &contour2,sizeof(CvContour), CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE,cvPoint(0,0));

      gettimeofday(&endTime, NULL);

      t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

      t_avg = (float)t_algo / (float)n;

      printf("Called DSP cvFindContours function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

      printf("Test for cvFindContours function called successfully. Output saved to filesystem\n");

      

      for(i=0;contour2;contour2=contour2->h_next,i++)  {

        // status = contour2->flags;

        // printf("Flag=%x\n",status);

         C6accel_cvDrawContours(hC6accel,inputImg2, contour2,CV_RGB(255,0,0),CV_RGB(0,255,0),-1, 1,8, cvPoint(0,0) );

       }

        

      // 6. Save outputs to filesystem

        cvSaveImage("./output_arm_contour.png",inputImg1 , 0);

        cvSaveImage("./output_dsp_contour.png",inputImg2 , 0);

   

      //7. Free memory allocated to images

        cvReleaseImage(&inputImg1);

        cvReleaseImage(&inputImg2);

        cvReleaseImage(&g_gray);

        cvReleaseImage(&outputImg_arm);

        cvReleaseImage(&outputImg_dsp);

        cvReleaseMemStorage(&g_storage_2);

        cvReleaseMemStorage(&g_storage_1);

        return 0;

}



Int C6Accel_test_Dilate(C6accel_Handle hC6accel, char *input_file_name1, int n)

{

        IplImage *inputImg1, *outputImg_arm, *outputImg_dsp;

        int pos= 0;

        struct timeval startTime, endTime;

        int t_overhead, t_algo, i;

        float t_avg;

        // 1. Load Input

        inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR);

       

        // 2. Allocate output images (must have same depth, channels as input)

        outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels);

        outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels);

        

        //3. Benchmark the ARM call

        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

           cvDilate(inputImg1,outputImg_arm,NULL,pos);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called ARM cvDilate function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

        //4. Benchmark the DSP call

        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

           C6accel_cvDilate(hC6accel,inputImg1,outputImg_dsp,NULL,pos);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called DSP cvDilate function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

        

       // 5. Compare outputs

       printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL));

       // 6. Save output

        cvSaveImage("./output_arm.png",outputImg_arm , 0);

        cvSaveImage("./output_dsp.png",outputImg_dsp , 0);

        printf("Test for Dilate operations done\n");

                      

       //7. Free memory allocated to images

        cvReleaseImage(&inputImg1);

        cvReleaseImage(&outputImg_arm);

        cvReleaseImage(&outputImg_dsp);

        return 1;



}



Int C6Accel_test_Erode(C6accel_Handle hC6accel, char *input_file_name1, int n)

{

        IplImage *inputImg1, *outputImg_arm, *outputImg_dsp;

        int pos= 0;

        struct timeval startTime, endTime;

        int t_overhead, t_algo, i;

        float t_avg;



        inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR);

       

        // 2. Allocate output images (must have same depth, channels as input)

        outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels);

        outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels);

        

        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

           cvErode(inputImg1,outputImg_arm,NULL,pos);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called ARM cvErode function %i times (average time: %f ms)\n", n, t_avg / 1000.0);



        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

           C6accel_cvErode(hC6accel,inputImg1,outputImg_dsp,NULL,pos);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called DSP cvErode function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

        

       // 5. Compare outputs

       printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL));

   

        cvSaveImage("./output_arm.png",outputImg_arm , 0);

        cvSaveImage("./output_dsp.png",outputImg_dsp , 0);

        printf("Test for Erode operations done\n");

     

       //7. Free memory allocated to images

        cvReleaseImage(&inputImg1);

        cvReleaseImage(&outputImg_arm);

        cvReleaseImage(&outputImg_dsp);

        return 1;



}



Int C6Accel_test_Laplace(C6accel_Handle hC6accel, char *input_file_name1, int n)

{

        IplImage *inputImg1, *outputImg_arm, *outputImg_dsp, *g_gray;

        int pos= 0;

        struct timeval startTime, endTime;

        int t_overhead, t_algo, i;

        float t_avg;



        inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR);

       

        // 2. Allocate output images (must have same depth, channels as input)

        outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), IPL_DEPTH_16S, 1);

        outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), IPL_DEPTH_16S, 1);

        g_gray = cvCreateImage(cvSize(inputImg1->width, inputImg1->height),IPL_DEPTH_8U, 1);

	

	cvCvtColor( inputImg1, g_gray, CV_BGR2GRAY );

        

        gettimeofday(&startTime, NULL);

        //for (i = 0; i < n; i++)

         cvLaplace(g_gray,outputImg_arm ,3);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called ARM cvLaplace function %i times (average time: %f ms)\n", n, t_avg / 1000.0);



        gettimeofday(&startTime, NULL);

       // for (i = 0; i < n; i++)

         C6accel_cvLaplace(hC6accel,g_gray,outputImg_dsp,3);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo /(float)n;



        printf("Called DSP cvLaplace function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

         // 5. Compare outputs

        printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL));

   

        cvSaveImage("./output_arm.png",outputImg_arm , 0);

        cvSaveImage("./output_dsp.png",outputImg_dsp , 0);

        printf("Test for Laplace operations done\n");

  

       //7. Free memory allocated to images

        cvReleaseImage(&inputImg1);

        cvReleaseImage(&g_gray);

        cvReleaseImage(&outputImg_arm);

        cvReleaseImage(&outputImg_dsp);

        return 1;



}



Int C6Accel_test_PyrDown(C6accel_Handle hC6accel, char *input_file_name1, int n)

{

        IplImage *inputImg1, *outputImg_arm, *outputImg_dsp;

        int pos= 0;

        struct timeval startTime, endTime;

        int t_overhead, t_algo, i;

        float t_avg;



        inputImg1 = cvLoadImage( input_file_name1, 0);

        

        // 2. Allocate output images (must have same depth, channels as input)

        outputImg_arm = cvCreateImage(cvSize(inputImg1->width/2, inputImg1->height/2), IPL_DEPTH_8U, 1);

        outputImg_dsp = cvCreateImage(cvSize(inputImg1->width/2, inputImg1->height/2), IPL_DEPTH_8U, 1);

                

        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

         cvPyrDown(inputImg1,outputImg_arm ,CV_GAUSSIAN_5x5);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called ARM cvPyrDown function %i times (average time: %f ms)\n", n, t_avg / 1000.0);



        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

         C6accel_cvPyrDown(hC6accel,inputImg1,outputImg_dsp,CV_GAUSSIAN_5x5);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called DSP cvPyrDown function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

         // 5. Compare outputs

        printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL));

   

        cvSaveImage("./output_arm.png",outputImg_arm , 0);

        cvSaveImage("./output_dsp.png",outputImg_dsp , 0);

        printf("Test for PyrDown operations done\n");

  

       //7. Free memory allocated to images

        cvReleaseImage(&inputImg1);

        cvReleaseImage(&outputImg_arm);

        cvReleaseImage(&outputImg_dsp);

        return 1;



}



Int C6Accel_test_Filter2D(C6accel_Handle hC6accel, char *input_file_name1, int n)

{

        IplImage *inputImg1, *outputImg_arm, *outputImg_dsp;

        int pos= 0;

        struct timeval startTime, endTime;

        int t_overhead, t_algo, i;

        float t_avg;

        CvMat *filter;

        int nFiltCols=5, nFiltRows =5;

        float kernel [25] = { 0,-1, 0,1,0,

                            -1,-2,0,2,1,

                            -1,-2,1,2,1,

                            -1,-1,0,2,1,

                             0,-1,0,1,0};

        float* pkernel;

         /* Allocate CMEM memory for 3x3 short mask*/

        pkernel = Memory_alloc(25*sizeof(float), &testfxnsMemParams);

        memcpy( pkernel,kernel,25*sizeof(float));



        inputImg1 = cvLoadImage( input_file_name1, 0);

       

        // 2. Allocate output images (must have same depth, channels as input)

        outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), IPL_DEPTH_8U, 1);

        outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), IPL_DEPTH_8U, 1);

        

        filter = cvCreateMat(nFiltRows, nFiltCols,  CV_32FC1);

        cvSetData(filter,pkernel,nFiltCols*sizeof(float) );



        printf("Mat =%x\n", CMEM_getPhys(filter));

        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

            cvFilter2D(inputImg1,outputImg_arm,filter,cvPoint(-1,-1));

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;

        printf("Called ARM cvFilter2D function %i times (average time: %f ms)\n", n, t_avg / 1000.0);



        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

         C6accel_cvFilter2D(hC6accel,inputImg1,outputImg_dsp,filter,cvPoint(-1,-1));

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;

     

        printf("Called DSP cvFilter2D function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

         // 5. Compare outputs

        printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL));

   

        cvSaveImage("./output_arm.png",outputImg_arm , 0);

        cvSaveImage("./output_dsp.png",outputImg_dsp , 0);



        printf("Test for Filter2D operations done\n");

      

       //7. Free memory allocated to images

        cvReleaseImage(&inputImg1);

        cvReleaseImage(&outputImg_arm);

        cvReleaseImage(&outputImg_dsp);

        //printf("Test for Filter2D operations done\n");

        /*  Release Gaussian CMEM */

        Memory_free(pkernel,25*sizeof(float),&testfxnsMemParams);

        return 1;



}





Int C6Accel_test_Canny(C6accel_Handle hC6accel, char *input_file_name1, int n)

{

        IplImage *inputImg1, *outputImg_arm, *outputImg_dsp, *g_gray;

        int pos= 0;

        struct timeval startTime, endTime;

        int t_overhead, t_algo, i;

        float t_avg;



        inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR);

       

        // 2. Allocate output images (must have same depth, channels as input)

        outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), 8,1);

        outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), 8,1);

         g_gray = cvCreateImage(cvSize(inputImg1->width, inputImg1->height),8,1);

	//for (i=0;i<30;i++)

	cvCvtColor(inputImg1, g_gray, CV_BGR2GRAY );

        

        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

         cvCanny(g_gray,outputImg_arm ,10.0,100.0,3);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called ARM cvCanny function %i times (average time: %f ms)\n", n, t_avg / 1000.0);



        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

         C6accel_cvCanny(hC6accel,g_gray,outputImg_dsp,(double)10.0,(double)100.00,3);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called DSP cvCanny function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

         // 5. Compare outputs

        printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL));

   

        cvSaveImage("./output_arm.png",outputImg_arm , 0);

        cvSaveImage("./output_dsp.png",outputImg_dsp , 0);

        printf("Test for Canny edge detection done\n");

             

       //7. Free memory allocated to images

        cvReleaseImage(&inputImg1);

        cvReleaseImage(&g_gray);

        cvReleaseImage(&outputImg_arm);

        cvReleaseImage(&outputImg_dsp);

        return 1;



}



Int C6Accel_test_CornerHarris(C6accel_Handle hC6accel, char *input_file_name1, int n)

{

        IplImage *inputImg1, *outputImg_arm, *outputImg_dsp, *corner8;

        int pos= 0;

        struct timeval startTime, endTime;

        int t_overhead, t_algo, i;

        float t_avg;

        double minVal=0.0, maxVal=0.0;   

        double scale, shift;   

        double min=0, max=255;   





        inputImg1 = cvLoadImage( input_file_name1, 0);

       

        // 2. Allocate output images (must have same depth, channels as input)

        outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height),IPL_DEPTH_32F ,1);

        outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), IPL_DEPTH_32F ,1);

        corner8 = cvCreateImage(cvSize(inputImg1->width, inputImg1->height),IPL_DEPTH_8U ,1);

		       

        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

         cvCornerHarris(inputImg1,outputImg_arm ,3,3, 0.04);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called ARM cvCornerHarris function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

        cvMinMaxLoc( outputImg_arm, &minVal, &maxVal, NULL, NULL, 0);   

        scale = (max - min)/(maxVal-minVal);   

        shift = -minVal * scale + min;   

        cvConvertScale(outputImg_arm, corner8 ,scale,shift);

        cvSaveImage("./output_arm.png",corner8 , 0);

        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

         C6accel_cvCornerHarris(hC6accel,inputImg1,outputImg_dsp,3,3, 0.04);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called DSP cvCornerHarris function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

        cvMinMaxLoc( outputImg_dsp, &minVal, &maxVal, NULL, NULL, 0);   

        scale = (max - min)/(maxVal-minVal);   

        shift = -minVal * scale + min;   

        cvConvertScale(outputImg_dsp, corner8 ,scale,shift);



         // 5. Compare outputs

        printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL));

           

        cvSaveImage("./output_dsp.png",corner8 , 0);

        printf("Test for cornerharris edge detection done\n");

             

       //7. Free memory allocated to images

        cvReleaseImage(&inputImg1);

        cvReleaseImage(&corner8);

        cvReleaseImage(&outputImg_arm);

        cvReleaseImage(&outputImg_dsp);

        return 1;



}



Int C6Accel_test_CornerEigenValsAndVecs(C6accel_Handle hC6accel, char *input_file_name1, int n)

{

        IplImage *inputImg1, *outputImg_arm, *outputImg_dsp, *g_gray, *eig_val_arm, *eig_val_dsp;

        int pos= 0;

        struct timeval startTime, endTime;

        int t_overhead, t_algo, i;

        float t_avg;

        double minVal=0.0, maxVal=0.0;   

        double scale, shift;   

        double min=0, max=255;   





        inputImg1 = cvLoadImage( input_file_name1, 1);

       

        // 2. Allocate output images (must have same depth, channels as input)

        //Output must have 6 times the width of the input image to store Eigen values and eigen vectors.

        outputImg_arm = cvCreateImage(cvSize(inputImg1->width*6, inputImg1->height),IPL_DEPTH_32F ,1);

        outputImg_dsp = cvCreateImage(cvSize(inputImg1->width*6, inputImg1->height), IPL_DEPTH_32F ,1);

        g_gray = cvCreateImage(cvSize(inputImg1->width,inputImg1->height), inputImg1->depth, 1);

        eig_val_arm = cvCreateImage(cvSize(inputImg1->width,inputImg1->height), IPL_DEPTH_32F, 1);

        eig_val_dsp = cvCreateImage(cvSize(inputImg1->width,inputImg1->height), IPL_DEPTH_32F, 1);

        cvvConvertImage (inputImg1, g_gray, 0);



        gettimeofday(&startTime, NULL);

       

        //After that it finds eigenvectors and eigenvalues of

        //the resultant matrix and stores them into destination

        //image in form (¦Ë1, ¦Ë2, x1, y1, x2, y2), where

        //¦Ë1, ¦Ë2 - eigenvalues of M; not sorted

        //(x1, y1) - eigenvector corresponding to ¦Ë1

        //(x2, y2) - eigenvector corresponding to ¦Ë2





        for (i = 0; i < n; i++)

         cvCornerEigenValsAndVecs(g_gray,outputImg_arm ,5,5);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;

        printf("Called ARM cvCornerEigenValsAndVecs function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

       // cvSaveImage("./output_arm.png",outputImg_arm , 0);

        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

         C6accel_cvCornerEigenValsAndVecs(hC6accel,g_gray,outputImg_dsp,5,5);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called DSP cvCornerEigenValsAndVecs function %i times (average time: %f ms)\n", n, t_avg / 1000.0);



        // 5. Compare outputs

        printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL));

        

        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

         cvCornerMinEigenVal(g_gray,eig_val_arm ,5,5);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;

        printf("Called ARM cvCornerMinEigenVal function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

        cvSaveImage("./output_arm.png",eig_val_arm , 0);

        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

         C6accel_cvCornerMinEigenVal(hC6accel,g_gray,eig_val_dsp ,5,5);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;

        printf("Called DSP cvCornerMinEigenVal function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

        cvSaveImage("./output_dsp.png",eig_val_dsp , 0);

        // 5. Compare outputs

        printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(eig_val_arm, eig_val_dsp, CV_L2, NULL));



               

        printf("Test for CornerEigenValsAndVecs and CornerMinEigVal completed successfully\n");

             

       //7. Free memory allocated to images

        cvReleaseImage(&inputImg1);

        cvReleaseImage(&g_gray);

        cvReleaseImage(&eig_val_arm);

        cvReleaseImage(&eig_val_dsp);

        cvReleaseImage(&outputImg_arm);

        cvReleaseImage(&outputImg_dsp);

        return 1;



}





Int C6Accel_test_Smooth(C6accel_Handle hC6accel, char *input_file_name1, int n)

{

        IplImage *inputImg1, *outputImg_arm, *outputImg_dsp, *g_gray;

        int pos= 0;

        struct timeval startTime, endTime;

        int t_overhead, t_algo, i;

        float t_avg;



        inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR);

       

        // 2. Allocate output images (must have same depth, channels as input)

        outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels);

        outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels);

                

        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

         cvSmooth(inputImg1,outputImg_arm ,CV_GAUSSIAN, 11,11,0.0,0.0);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called ARM cvSmooth function %i times (average time: %f ms)\n", n, t_avg / 1000.0);



        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

         C6accel_cvSmooth(hC6accel,inputImg1,outputImg_dsp ,CV_GAUSSIAN, 11,11,(double)0.0,(double)0.0);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called DSP cvSmooth function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

         // 5. Compare outputs

        printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL));

   

        cvSaveImage("./output_arm.png",outputImg_arm , 0);

        cvSaveImage("./output_dsp.png",outputImg_dsp , 0);

        printf("Test for Smooth/Bluring operation done\n");

             

       //7. Free memory allocated to images

        cvReleaseImage(&inputImg1);

        cvReleaseImage(&outputImg_arm);

        cvReleaseImage(&outputImg_dsp);

        return 1;



}





Int C6Accel_test_AdaptiveThreshold(C6accel_Handle hC6accel, char *input_file_name1, int n)

{

        IplImage *inputImg1, *outputImg_arm, *outputImg_dsp, *g_gray;

        int pos= 0;

        struct timeval startTime, endTime;

        int t_overhead, t_algo, i;

        float t_avg;



        inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR);

       

        // 2. Allocate output images (must have same depth, channels as input)

        outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), IPL_DEPTH_8U,1);

        outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), IPL_DEPTH_8U,1);

        g_gray = cvCreateImage(cvSize(inputImg1->width, inputImg1->height),IPL_DEPTH_8U,1);



       	cvCvtColor( inputImg1, g_gray, CV_BGR2GRAY );

                

        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

         cvAdaptiveThreshold(g_gray,outputImg_arm ,(double)125.0, CV_ADAPTIVE_THRESH_MEAN_C,CV_THRESH_BINARY,7,(double)10.0);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called ARM Adaptive threshold function %i times (average time: %f ms)\n", n, t_avg / 1000.0);



        cvCvtColor( inputImg1, g_gray, CV_BGR2GRAY );



        gettimeofday(&startTime, NULL);

        for (i = 0; i < n; i++)

         C6accel_cvAdaptiveThreshold(hC6accel,g_gray,outputImg_dsp ,(double)125.0, CV_ADAPTIVE_THRESH_MEAN_C,CV_THRESH_BINARY,7,(double)10.0);

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called DSP Adaptive threshold function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

         // 5. Compare outputs

        printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL));

   

        cvSaveImage("./output_arm.png",outputImg_arm , 0);

        cvSaveImage("./output_dsp.png",outputImg_dsp , 0);

        printf("Test for Adaptive thresholding operation done\n");

             

       //7. Free memory allocated to images

        cvReleaseImage(&inputImg1);

        cvReleaseImage(&g_gray);

        cvReleaseImage(&outputImg_arm);

        cvReleaseImage(&outputImg_dsp);



        return 1;



}





Int C6Accel_test_Houghlines2D(C6accel_Handle hC6accel, char *input_file_name1, int n)

{



 IplImage* src, *dst;



 IplImage* color_dst_arm, *color_dst_dsp;

 CvMemStorage* storage_dsp, *storage_arm ;

 CvSeq* lines_dsp = NULL, *lines_arm =NULL;

 int t_overhead, t_algo, i;

 struct timeval startTime, endTime;

 float t_avg;

 CvPoint pt1,pt2;

 float* line, rho, theta;

 double a,b, x0,y0;

 void *temp_ptr;



 src= cvLoadImage(input_file_name1, 0);

 dst= cvCreateImage( cvGetSize(src), 8, 1 );

 color_dst_arm = cvCreateImage( cvGetSize(src), 8, 3 );

 color_dst_dsp = cvCreateImage( cvGetSize(src), 8, 3 );



 storage_arm = cvCreateMemStorage(0);

 temp_ptr = cvMemStorageAlloc(storage_arm, 64);

 

 cvCanny( src, dst, 50, 200, 3 );

 cvCvtColor( dst, color_dst_arm, CV_GRAY2BGR );

 cvCvtColor( dst, color_dst_dsp, CV_GRAY2BGR );

 

 gettimeofday(&startTime, NULL);

 /*C6accel_cvHoughLines2( hC6accel, dst,

                               storage,

                               CV_HOUGH_PROBABILISTIC,

                               (double)1.0,

                               (double)(CV_PI/180),

                               80,

                               (double)30.0,

                               (double)10.0,&lines );*/

lines_arm = cvHoughLines2(     dst,

                               storage_arm,

                               CV_HOUGH_PROBABILISTIC,

                               (double)1.0,

                               (double)(CV_PI/180),

                               80,

                               (double)30.0,

                               (double)10.0);

gettimeofday(&endTime, NULL);



t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

t_avg = (float)t_algo / (float)n;

printf("Called ARM HoughLines2D function %i times (average time: %f ms)\n", n, t_avg / 1000.0);



 for( i = 0; i < lines_arm->total; i++ )

 {

    CvPoint* line = (CvPoint*)cvGetSeqElem(lines_arm,i);

    cvLine( color_dst_arm, line[0], line[1], CV_RGB(255,0,0), 1, 8,0 );

 }

cvSaveImage("./output_arm.png",color_dst_arm , 0);



//DSP Processing

 storage_dsp = cvCreateMemStorage(0);

 temp_ptr = cvMemStorageAlloc(storage_dsp, 64);

 gettimeofday(&startTime, NULL);

 C6accel_cvHoughLines2( hC6accel, dst,

                               storage_dsp,

                               CV_HOUGH_PROBABILISTIC,

                               (double)1.0,

                               (double)(CV_PI/180),

                               80,

                               (double)30.0,

                               (double)10.0,&lines_dsp );



 gettimeofday(&endTime, NULL);



 t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

 t_avg = (float)t_algo / (float)n;

 printf("Called DSP HoughLines2D function %i times (average time: %f ms)\n", n, t_avg / 1000.0);



 for( i = 0; i < lines_dsp->total; i++ )

 {

    CvPoint* line = (CvPoint*)cvGetSeqElem(lines_dsp,i);

    cvLine( color_dst_dsp, line[0], line[1], CV_RGB(255,0,0), 1, 8,0 );

 }

  // 5. Compare outputs

 printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(color_dst_arm, color_dst_dsp, CV_L2, NULL));

 cvSaveImage("./output_dsp.png",color_dst_dsp , 0);





 printf("Releasing allocated buffers\n");

 //6 Release Memory allocatted for the test

 cvReleaseImage( &color_dst_arm );

 cvReleaseImage( &color_dst_dsp );

 cvReleaseImage( &src);

 cvReleaseImage( &dst);

 cvReleaseMemStorage( &storage_arm);

 cvReleaseMemStorage( &storage_dsp);

 return 1;





}







/*Int C6Accel_test_opticalflowHS(C6accel_Handle hC6accel, char *input_file_name1, int n)

{

 int step, x,y;

 float *px, *py;

 int t_overhead, t_algo, i;

 struct timeval startTime, endTime;

 float t_avg;

// Initialize, load two images from the file system, and

    // allocate the images and other structures we will need for

    // results.



    // exit if no input images

    IplImage *imgA = 0, *imgB = 0;

    imgA = cvLoadImage("opencv_images/OpticalFlow0.jpg",0);

    imgB = cvLoadImage("opencv_images/OpticalFlow1.jpg",0);

    if(!(imgA)||!(imgB)){ printf("One of OpticalFlow0.jpg and/or OpticalFlow1.jpg didn't load\n"); return -1;}

    printf("1\n");

    IplImage* velx = cvCreateImage(cvGetSize(imgA),IPL_DEPTH_32F,1);

    IplImage* vely = cvCreateImage(cvGetSize(imgA),IPL_DEPTH_32F,1);

    IplImage* imgC = cvCreateImage(cvGetSize(imgA),IPL_DEPTH_8U,3);

    imgC =  cvLoadImage("opencv_images/OpticalFlow1.jpg",1);

    printf("2\n");

    cvSaveImage( "./OpticalFlow0.png",imgA, 0 );

    cvSaveImage( "./OpticalFlow1.png",imgB, 0 );





    gettimeofday(&startTime, NULL);

    // Call the actual Horn and Schunck algorithm

    //

    cvCalcOpticalFlowHS( 

        imgA, 

        imgB, 

        0,

        velx,

        vely,

        .10,

        cvTermCriteria( 

            CV_TERMCRIT_ITER | CV_TERMCRIT_EPS,

            imgA->width,

            1e-6

        )

    );

    gettimeofday(&endTime, NULL);



 t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

 t_avg = (float)t_algo / (float)n;

 printf("Called Optical flow function %i times (average time: %f ms)\n", n, t_avg / 1000.0);



    // Now make some image of what we are looking at:

    //

    gettimeofday(&startTime, NULL);

    step = 4;

    for(  y=0; y<imgC->height; y += step ) {

         px = (float*) ( velx->imageData + y * velx->widthStep );

         py = (float*) ( vely->imageData + y * vely->widthStep );

         for(  x=0; x<imgC->width; x += step ) {

            if( px[x]>1 && py[x]>1 ) {

                cvCircle(

                    imgC,

                    cvPoint( x, y ),

                    2,

                    CVX_GRAY50,

                    -1,8,0

                );

                

                cvLine(

                    imgC,

                    cvPoint( x, y ),

                    cvPoint( x+px[x]/2, y+py[x]/2 ),

                    CV_RGB(255,0,0),

                    1,8,

                    0

                );

              

            }

        }

    }



    gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called ARM cvSmooth function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

    

    // show tracking

    cvSaveImage( "./Flow Results.png",imgC ,0);

    

    // release memory

    cvReleaseImage( &imgA );

    cvReleaseImage( &imgB );

    cvReleaseImage( &imgC );

    cvReleaseImage( &velx);

    cvReleaseImage( &vely);



    return 0;

}*/





Int C6Accel_test_rotation(C6accel_Handle hC6accel, char *input_file_name1, int n)

{

        int angle_switch_value = 0;

        int angleInt = 0;

        int scale_switch_value = 0;

        int scaleInt = 0;

          struct timeval startTime, endTime;

        int t_overhead, t_algo, i;

        float t_avg;





// Set up variables

	CvPoint2D32f srcTri[3], dstTri[3];

	CvMat* rot_mat = cvCreateMat(2,3,CV_32FC1);

        CvMat* rot_mat_dsp = cvCreateMat(2,3,CV_32FC1);

	CvMat* warp_mat = cvCreateMat(2,3,CV_32FC1);

        CvMat* warp_mat_dsp = cvCreateMat(2,3,CV_32FC1);

	IplImage *src, *dst_arm, *dst_dsp;

	const char* name = "Affine_Transform";



	// Load image

	src=cvLoadImage(input_file_name1,1);

	dst_arm = cvLoadImage(input_file_name1,1);

        dst_dsp = cvLoadImage(input_file_name1,1);

	dst_arm->origin = src->origin;

        dst_dsp->origin = src->origin;

	cvZero( dst_arm );

	cvZero( dst_dsp );

       	// Create angle and scale

	double angle = 45.0;

	double scale = 1.0;

	

	// Compute warp matrix

	srcTri[0].x = 0;

	srcTri[0].y = 0;

	srcTri[1].x = src->width - 1;

	srcTri[1].y = 0;

	srcTri[2].x = 0;

	srcTri[2].y = src->height - 1;



	dstTri[0].x = src->width*0.0;

	dstTri[0].y = src->height*0.25;

	dstTri[1].x = src->width*0.90;

	dstTri[1].y = src->height*0.15;

	dstTri[2].x = src->width*0.10;

	dstTri[2].y = src->height*0.75;

        

        gettimeofday(&startTime, NULL);

	   cvGetAffineTransform( srcTri, dstTri, warp_mat );

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called ARM get Affine transform matrix %i times (average time: %f ms)\n", n, t_avg / 1000.0);

       

        gettimeofday(&startTime, NULL);

             

          C6accel_cvGetAffineTransform( hC6accel, srcTri, dstTri, warp_mat_dsp );



        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called DSP get Affine transform matrix %i times (average time: %f ms)\n", n, t_avg / 1000.0);

        

         // 5. Compare outputs

        printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(warp_mat, warp_mat_dsp, CV_L2, NULL));



        gettimeofday(&startTime, NULL);

	     cvWarpAffine( src, dst_arm, warp_mat,CV_INTER_LINEAR+CV_WARP_FILL_OUTLIERS,cvScalarAll(0) );

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called ARM Affine transform function %i times (average time: %f ms)\n", n, t_avg / 1000.0);





        gettimeofday(&startTime, NULL);

	C6accel_cvWarpAffine( hC6accel,src, dst_dsp, warp_mat_dsp,CV_INTER_LINEAR+CV_WARP_FILL_OUTLIERS,cvScalarAll(0) );

         gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called DSP Affine transform function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

	 // 5. Compare outputs

        printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(dst_arm, dst_dsp, CV_L2, NULL));

   

	cvSaveImage( "./output_arm_affine.png",dst_arm ,0);

        cvSaveImage( "./output_dsp_affine.png",dst_dsp ,0);

        cvCopy ( dst_arm, src, NULL );

        cvCopy ( dst_dsp, src, NULL );

	// Compute rotation matrix

	CvPoint2D32f center = cvPoint2D32f( src->width/2, src->height/2 );

        gettimeofday(&startTime, NULL);

	     cv2DRotationMatrix(center, angle, scale, rot_mat );

        gettimeofday(&endTime, NULL);



        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called ARM 2D Rotation Matrix function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

         

        gettimeofday(&startTime, NULL);

	     C6accel_cv2DRotationMatrix(hC6accel, center, angle, scale, rot_mat_dsp );

        gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called DSP 2D Rotation Matrix function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

        printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(rot_mat, rot_mat_dsp, CV_L2, NULL));

        // Do the transformation

         gettimeofday(&startTime, NULL);

		cvWarpAffine(src, dst_arm, rot_mat,CV_INTER_LINEAR+CV_WARP_FILL_OUTLIERS,cvScalarAll(0) );

         gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called ARM Rotation using Affine function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

	// Do the transformation

         gettimeofday(&startTime, NULL);

		C6accel_cvWarpAffine( hC6accel,src, dst_dsp, rot_mat,CV_INTER_LINEAR+CV_WARP_FILL_OUTLIERS,cvScalarAll(0) );

         gettimeofday(&endTime, NULL);

        t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead;

        t_avg = (float)t_algo / (float)n;



        printf("Called DSP Rotation using Affine function %i times (average time: %f ms)\n", n, t_avg / 1000.0);

         // 5. Compare outputs

        printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(dst_arm, dst_dsp, CV_L2, NULL));



	cvSaveImage( "./output_arm_rotated.png",dst_arm ,0);

	cvSaveImage( "./output_dsp_rotated.png",dst_dsp ,0);



	cvReleaseImage( &dst_arm );

        cvReleaseImage( &dst_dsp );

        cvReleaseImage( &src);

	cvReleaseMat( &rot_mat );

	cvReleaseMat( &warp_mat );

        cvReleaseMat( &rot_mat_dsp );

	cvReleaseMat( &warp_mat_dsp );

	return 0;

}