/*******************************************************************************
* Copyright (C) 2022 Intel Corporation
*
* This software and the related documents are Intel copyrighted  materials,  and
* your use of  them is  governed by the  express license  under which  they were
* provided to you (License).  Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute,  disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents  are provided as  is,  with no express
* or implied  warranties,  other  than those  that are  expressly stated  in the
* License.
*******************************************************************************/

#include <stdio.h>
#include <math.h>
#include <omp.h>
#include "mkl.h"
#include "mkl_omp_offload.h"


int main()
{
    char N = 'N';
    const MKL_INT m = 5, n = 5, nrhs = 1, lda = 5, stride_a = n*lda, ldb = 5, stride_b = nrhs*ldb, batch_size = 2;

    double A[] = {
        1.0,  0.0,  0.0,  0.0,  0.0,
        1.0,  0.2, -0.4, -0.4, -0.8,
        1.0,  0.6, -0.2,  0.4, -1.2,
        1.0,  1.0, -1.0,  0.6, -0.8,
        1.0,  1.8, -0.6,  0.2, -0.6
                                    ,
        0.2, -0.4, -0.4, -0.8,  0.0,
        0.4,  0.2,  0.8, -0.4,  0.0,
        0.4, -0.8,  0.2,  0.4,  0.0,
        0.8,  0.4, -0.4,  0.2,  0.0,
        0.0,  0.0,  0.0,  0.0,  1.0
    };

    double B[] = {
        5.0,  3.6, -2.2,  0.8, -3.4
                                    ,
        1.8, -0.6,  0.2, -0.6,  1.0
    };

    double X[] = {
        1.0,  1.0,  1.0,  1.0,  1.0
                                    ,
        1.0,  1.0,  1.0,  1.0,  1.0
    };

    MKL_INT *info = malloc(batch_size*sizeof(MKL_INT));
    double *A_ptr = A, *B_ptr = B;

    #pragma omp target data map(A_ptr[0:stride_a*batch_size], B_ptr[0:stride_b*batch_size], info[0:batch_size])
    {
        #pragma omp dispatch
        dgels_batch_strided(&N, &m, &n, &nrhs, A_ptr, &lda, &stride_a, B_ptr, &ldb, &stride_b, &batch_size, info);
    }

    int num_errors = 0;

    for (int i=0; i<batch_size; i++) {
        if (info[i] != 0) {
            printf("ERROR: Calculations for problem %d failed with info = %d!\n", i, (int)info[i]);
            num_errors++;
        }
    }

    if (num_errors > 0) {
        return 1;
    }
    num_errors = 0;

    printf("Results:\n");
    for (int i=0; i<batch_size; i++) {
        for (int j=0; j<n; j++) {
            double res = B[i*stride_b+j];
            printf("%6.2f ", res);
            if ((res != res) || (fabs(res - X[j])) > 1e-8) {
                num_errors++;
            }
        }
        printf("\n");
    }

    if (num_errors > 0) {
        printf("ERROR: results mismatch!\n");
        printf("Expected:\n");
        for (int i=0; i<batch_size; i++) {
            for (int j=0; j<n; j++) {
                printf("%6.2f ", X[j]);
            }
            printf("\n");
        }
        return 1;
    } else {
        printf("Calculations successfully finished\n");
    }

    return 0;
}
