/**************************************************************************
*                                                                         *
*  Author      : Dr. Thomas Brandes, GMD, I1.HR                           *
*  Copyright   : GMD St. Augustin, Germany                                *
*  Date        : Feb 92                                                   *
*  Last Update : Aug 92                                                   *
*                                                                         *
*  This Module is part of the DALIB                                       *
*                                                                         *
*  Module      : replicate1.c                                             *
*                                                                         *
*  Function    : Operations for replicating sections of distr. arrays     *
*                                                                         *
*  Export : FORTRAN Interface                                             *
*                                                                         *
**************************************************************************/

# undef DEBUG

# include "system.h"

void dalib_replicate1__ (ra, a, size, N1, x1, y1)
unsigned char *a, *ra;
int *size;
int *N1, *x1, *y1;

{   /* replicate a(x1:y1) to ra */

    int pid_low, pid_up, send_pid;
    int my_pid;

    int send_size;
    int low, up;

    unsigned char *ra_ptr;

    my_pid = pcb.i;

#ifdef DEBUG
    printf ("Process %d calls replicate1 %d : %d\n", my_pid, *x1, *y1);
#endif

    /* pid_low : pid_up =  processors that own elements of this range */

    pid_low = dalib_where (*N1, *x1);
    pid_up  = dalib_where (*N1, *y1);

    ra_ptr = ra;

    for (send_pid = pid_low; send_pid <= pid_up; send_pid++)

      { /* get local range of process i */

        dalib_local_extensions (send_pid, *N1, &low, &up);
        if (low < *x1) low = *x1;
        if (up  > *y1) up  = *y1;

        /* replicate a(low:up) from processor send_pid */

        send_size = (up - low + 1) * *size;

        /* if I am process send_pid,  set the elements in ra */

        if (send_pid == my_pid)
          {  dalib_setup_section1 (*size, *N1, low, up);
             dalib_copy_section1  (ra_ptr, a);
          }

        /* broadcast of the section of ra   */

        process_broadcast (ra_ptr, send_size, send_pid);

        ra_ptr += send_size;
      }
} /* dalib_replicate1 */

void dalib_replicate2__ (ra, a, size, N1, x1, y1, N2, x2, y2)
unsigned char *a, *ra;
int *size;
int *N1, *x1, *y1, *N2, *x2, *y2;

{   /* replicate a(x1:y1,x2:y2) to ra */

    int pid_low, pid_up, send_pid;
    int my_pid;

    int send_size;
    int low, up;

    unsigned char *ra_ptr;

    my_pid = pcb.i;

#ifdef DEBUG
    printf ("Process %d calls replicate2 %d : %d\n",
             my_pid, *x1, *y1, *x2, *y2);
#endif

    /* pid_low : pid_up =  processors that own elements of this range */

    pid_low = dalib_where (*N2, *x2);
    pid_up  = dalib_where (*N2, *y2);

    ra_ptr = ra;

    for (send_pid = pid_low; send_pid <= pid_up; send_pid++)

      { /* get local range of process i */

        dalib_local_extensions (send_pid, *N2, &low, &up);
        if (low < *x2) low = *x2;
        if (up  > *y2) up  = *y2;

        /* replicate a(low:up) from processor send_pid */

        send_size = (up - low + 1) * (*y1 - *x1 + 1) * *size;

        /* if I am process send_pid,  set the elements in ra */

        if (send_pid == my_pid)
          {  dalib_setup_section2 (*size, *N1, *x1, *y1, *N2, low, up);
             dalib_copy_section2  (ra_ptr, a);
          }

        /* broadcast of the section of ra   */

        process_broadcast (ra_ptr, send_size, send_pid);

        ra_ptr += send_size;
      }
} /* dalib_replicate2 */

void dalib_replicate3__ (ra, a, size, N1, x1, y1, N2, x2, y2, N3, x3, y3)
unsigned char *a, *ra;
int *size;
int *N1, *x1, *y1, *N2, *x2, *y2, *N3, *x3, *y3;

{   /* replicate a(x1:y1,x2:y2,x3:y3) to ra */

    int pid_low, pid_up, send_pid;
    int my_pid;

    int send_size;
    int low, up;

    unsigned char *ra_ptr;

    my_pid = pcb.i;

#ifdef DEBUG
    printf ("Process %d calls replicate3 %d : %d, %d : %d, %d : %d\n",
             my_pid, *x1, *y1, *x2, *y2, *x3, *y3);
#endif

    /* pid_low : pid_up =  processors that own elements of this range */

    pid_low = dalib_where (*N3, *x3);
    pid_up  = dalib_where (*N3, *y3);

    ra_ptr = ra;

    for (send_pid = pid_low; send_pid <= pid_up; send_pid++)

      { /* get local range of process i */

        dalib_local_extensions (send_pid, *N3, &low, &up);
        if (low < *x3) low = *x3;
        if (up  > *y3) up  = *y3;

        /* replicate a(low:up) from processor send_pid */

        send_size = (up - low + 1)  * (*y1 - *x1 + 1) * 
                    (*y2 - *x2 + 1) * *size;

        /* if I am process send_pid,  set the elements in ra */

        if (send_pid == my_pid)
          {  dalib_setup_section3 (*size, *N1, *x1, *y1, *N2, *x2, *y2,
                                          *N3, low, up);
             dalib_copy_section3  (ra_ptr, a);
          }

        /* broadcast of the section of ra   */

        process_broadcast (ra_ptr, send_size, send_pid);

        ra_ptr += send_size;
      }
} /* dalib_replicate3 */

void dalib_replicate4__ (ra, a, size, N1, x1, y1, N2, x2, y2, 
                                     N3, x3, y3, N4, x4, y4)
unsigned char *a, *ra;
int *size;
int *N1, *x1, *y1, *N2, *x2, *y2, *N3, *x3, *y3, *N4, *x4, *y4;

{   /* replicate a(x1:y1,x2:y2,x3:y3,x4:y4) to ra */

    int pid_low, pid_up, send_pid;
    int my_pid;

    int send_size;
    int low, up;

    unsigned char *ra_ptr;

    my_pid = pcb.i;

#ifdef DEBUG
    printf ("Process %d calls replicate4 %d : %d, %d : %d, %d : %d, %d : %d\n",
             my_pid, *x1, *y1, *x2, *y2, *x3, *y3, *x4, *y4);
#endif

    /* pid_low : pid_up =  processors that own elements of this range */

    pid_low = dalib_where (*N4, *x4);
    pid_up  = dalib_where (*N4, *y4);

    ra_ptr = ra;

    for (send_pid = pid_low; send_pid <= pid_up; send_pid++)

      { /* get local range of process i */

        dalib_local_extensions (send_pid, *N4, &low, &up);
        if (low < *x4) low = *x4;
        if (up  > *y4) up  = *y4;

        /* replicate a(low:up) from processor send_pid */

        send_size = (up - low + 1)  * (*y1 - *x1 + 1) * 
                    (*y2 - *x2 + 1) * (*y3 - *x3 + 1) *  *size;

        /* if I am process send_pid,  set the elements in ra */

        if (send_pid == my_pid)
          {  dalib_setup_section4 (*size, *N1, *x1, *y1, *N2, *x2, *y2,
                                          *N3, *x3, *y3, *N4, low, up);
             dalib_copy_section4  (ra_ptr, a);
          }

        /* broadcast of the section of ra   */

        process_broadcast (ra_ptr, send_size, send_pid);

        ra_ptr += send_size;
      }
} /* dalib_replicate4 */

/*    Idea of replication if most processors have elements 

C     ****************************************************************
C     *                                                              *
C     *  REPLICATE a full array NAME (only nodes)                    *
C     *                                                              *
C     ****************************************************************

      subroutine adp_full_replicate_NAME (NAME_dsp, NAME, ra)
      integer NAME_dsp
      TYPE NAME(*), ra(*)
      integer tsize
      parameter (tsize = TYPESIZE)
c
c        A of $1    A of $2        ....        A of $p
c      -------------------------------------------------
c      |         |          |     ......     |         |
c      |         |          |     ......     |         |
c      |         |          |     ......     |         |
c      -------------------------------------------------
c
c         shifting $p times every part to the right processor
c
      integer p, sendp, recvp, pid
      integer lb1, ub1, lb2, ub2, glb2, gub2
      integer k, size, address, rows
      integer dalib_nproc, dalib_pid 
      integer i
c
      p = dalib_nproc ()
      pid = dalib_pid()
      sendp = pid + 1
      if (sendp .gt. p) sendp = 1
      recvp = pid - 1
      if (recvp .eq. 0) recvp = p
c
      call dalib_array_dimensions2 (NAME_dsp, lb1, ub1, lb2, ub2)
c
      call dalib_pardim_dimensions (NAME_dsp, glb2, gub2)
c
c     address of ra (1, lb2) where ra (lb1:ub1,glb2:ub2)
      rows    = ub1 - lb1 + 1
      address = (lb2 - glb2) * rows + 1
      k = dalib_pid()
      size = (ub2 - lb2 + 1) * rows
c     
c     copy local part
      do i = 1, size
         ra (address + i - 1) = NAME (i)
      end do
      do i=1,p-1
c        send right processor the last part
         call dalib_array_size  (NAME_dsp, k, size)
         call dalib_send (sendp, ra (address), size)
         k = k - 1
         if (k .eq. 0) then
            k = p
            address = (gub2 - glb2 + 1) * rows + 1
         end if
c        get part of original processor k
         call dalib_array_size (NAME_dsp,k,size)
         address = address - (size / tsize)
         call dalib_receive (recvp, ra(address),size)
      end do
      end

*/
