SLP: support entire BB.

Message ID b1a866a7-502e-49e9-e8f8-a0fe3d99feae@suse.cz
State New
Headers show
Series
  • SLP: support entire BB.
Related show

Commit Message

Martin Liška July 31, 2020, 10:30 a.m.
Hey.

Motivation of the patch is to allow vectorization of an entire BB.
So far we bail out a sub-BB region when we reach a stmt for which
vect_find_stmt_data_reference returns false. That's replaced with
recoding of groups of the data references.

We can newly vectorize code like:

void foo();
void bar (int i, double *a, double *b)
{
   double tem1 = a[2*i];
   double tem2 = 2*a[2*i+1];
   foo ();
   b[2*i] = 2*tem1;
   b[2*i+1] = tem2;
}

into:
   <bb 2> [local count: 1073741824]:
   _1 = i_12(D) * 2;
   _2 = (long unsigned int) _1;
   _3 = _2 * 8;
   _4 = a_13(D) + _3;
   vect_tem1_15.5_24 = MEM <vector(2) double> [(double *)_4];
   vect__10.6_25 = vect_tem1_15.5_24 * { 2.0e+0, 2.0e+0 };
   foo ();
   _9 = b_18(D) + _3;
   MEM <vector(2) double> [(double *)_9] = vect__10.6_25;
   return;

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Thoughs?
Martin

gcc/ChangeLog:

	* tree-vect-data-refs.c (dr_group_sort_cmp): Work on
	data_ref_pair.
	(vect_analyze_data_ref_accesses): Work on groups.
	(vect_find_stmt_data_reference): Add group_id argument and fill
	up dataref_groups vector.
	* tree-vect-loop.c (vect_get_datarefs_in_loop): Pass new
	arguments.
	(vect_analyze_loop_2): Likewise.
	* tree-vect-slp.c (vect_slp_analyze_bb_1): Pass argument.
	(vect_slp_bb_region): Likewise.
	(vect_slp_region): Likewise.
	(vect_slp_bb):Work on the entire BB.
	* tree-vectorizer.h (vect_analyze_data_ref_accesses): Add new
	argument.
	(vect_find_stmt_data_reference): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.dg/vect/bb-slp-38.c: Adjust pattern as now we only process
	a single vectorization and now 2 partial.
	* gcc.dg/vect/bb-slp-45.c: New test.
---
  gcc/testsuite/gcc.dg/vect/bb-slp-38.c |  2 +-
  gcc/testsuite/gcc.dg/vect/bb-slp-45.c | 36 ++++++++++++
  gcc/tree-vect-data-refs.c             | 63 +++++++++++++-------
  gcc/tree-vect-loop.c                  |  5 +-
  gcc/tree-vect-slp.c                   | 82 ++++++++++-----------------
  gcc/tree-vectorizer.h                 |  5 +-
  6 files changed, 118 insertions(+), 75 deletions(-)
  create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-45.c

-- 
2.27.0

Comments

Marek Polacek via Gcc-patches Aug. 3, 2020, 10:29 a.m. | #1
On Fri, Jul 31, 2020 at 12:30 PM Martin Liška <mliska@suse.cz> wrote:
>

> Hey.

>

> Motivation of the patch is to allow vectorization of an entire BB.

> So far we bail out a sub-BB region when we reach a stmt for which

> vect_find_stmt_data_reference returns false. That's replaced with

> recoding of groups of the data references.

>

> We can newly vectorize code like:

>

> void foo();

> void bar (int i, double *a, double *b)

> {

>    double tem1 = a[2*i];

>    double tem2 = 2*a[2*i+1];

>    foo ();

>    b[2*i] = 2*tem1;

>    b[2*i+1] = tem2;

> }

>

> into:

>    <bb 2> [local count: 1073741824]:

>    _1 = i_12(D) * 2;

>    _2 = (long unsigned int) _1;

>    _3 = _2 * 8;

>    _4 = a_13(D) + _3;

>    vect_tem1_15.5_24 = MEM <vector(2) double> [(double *)_4];

>    vect__10.6_25 = vect_tem1_15.5_24 * { 2.0e+0, 2.0e+0 };

>    foo ();

>    _9 = b_18(D) + _3;

>    MEM <vector(2) double> [(double *)_9] = vect__10.6_25;

>    return;

>

> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.


OK with some minor changes, see below

> Thoughs?

> Martin

>

> gcc/ChangeLog:

>

>         * tree-vect-data-refs.c (dr_group_sort_cmp): Work on

>         data_ref_pair.

>         (vect_analyze_data_ref_accesses): Work on groups.

>         (vect_find_stmt_data_reference): Add group_id argument and fill

>         up dataref_groups vector.

>         * tree-vect-loop.c (vect_get_datarefs_in_loop): Pass new

>         arguments.

>         (vect_analyze_loop_2): Likewise.

>         * tree-vect-slp.c (vect_slp_analyze_bb_1): Pass argument.

>         (vect_slp_bb_region): Likewise.

>         (vect_slp_region): Likewise.

>         (vect_slp_bb):Work on the entire BB.

>         * tree-vectorizer.h (vect_analyze_data_ref_accesses): Add new

>         argument.

>         (vect_find_stmt_data_reference): Likewise.

>

> gcc/testsuite/ChangeLog:

>

>         * gcc.dg/vect/bb-slp-38.c: Adjust pattern as now we only process

>         a single vectorization and now 2 partial.

>         * gcc.dg/vect/bb-slp-45.c: New test.

> ---

>   gcc/testsuite/gcc.dg/vect/bb-slp-38.c |  2 +-

>   gcc/testsuite/gcc.dg/vect/bb-slp-45.c | 36 ++++++++++++

>   gcc/tree-vect-data-refs.c             | 63 +++++++++++++-------

>   gcc/tree-vect-loop.c                  |  5 +-

>   gcc/tree-vect-slp.c                   | 82 ++++++++++-----------------

>   gcc/tree-vectorizer.h                 |  5 +-

>   6 files changed, 118 insertions(+), 75 deletions(-)

>   create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-45.c

>

> diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-38.c b/gcc/testsuite/gcc.dg/vect/bb-slp-38.c

> index 59aec54fffd..9c57ea3c2c1 100644

> --- a/gcc/testsuite/gcc.dg/vect/bb-slp-38.c

> +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-38.c

> @@ -41,4 +41,4 @@ int main()

>   }

>

>   /* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target vect_perm } } } */

> -/* { dg-final { scan-tree-dump-times "basic block part vectorized" 2 "slp2" { target vect_perm } } } */

> +/* { dg-final { scan-tree-dump "basic block part vectorized" "slp2" { target vect_perm } } } */

> diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-45.c b/gcc/testsuite/gcc.dg/vect/bb-slp-45.c

> new file mode 100644

> index 00000000000..4107a34cb93

> --- /dev/null

> +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-45.c

> @@ -0,0 +1,36 @@

> +/* { dg-require-effective-target vect_double } */

> +

> +#include "tree-vect.h"

> +

> +extern void abort (void);

> +

> +double a[8], b[8];

> +int x;

> +

> +void __attribute__((noinline,noclone))

> +bar (void)

> +{

> +  x = 1;

> +}

> +

> +void __attribute__((noinline,noclone))

> +foo(int i)

> +{

> +  double tem1 = a[2*i];

> +  double tem2 = 2*a[2*i+1];

> +  bar ();

> +  b[2*i] = 2*tem1;

> +  b[2*i+1] = tem2;

> +}

> +

> +int main()

> +{

> +  int i;

> +  check_vect ();

> +  for (i = 0; i < 8; ++i)

> +    b[i] = i;

> +  foo (2);

> +  return 0;

> +}

> +

> +/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" } } */

> diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c

> index a78ae61d1b0..36e10ff3619 100644

> --- a/gcc/tree-vect-data-refs.c

> +++ b/gcc/tree-vect-data-refs.c

> @@ -2757,14 +2757,18 @@ vect_analyze_data_ref_access (vec_info *vinfo, dr_vec_info *dr_info)

>     return vect_analyze_group_access (vinfo, dr_info);

>   }

>

> +typedef std::pair<data_reference_p, int> data_ref_pair;

> +

>   /* Compare two data-references DRA and DRB to group them into chunks

>      suitable for grouping.  */

>

>   static int

>   dr_group_sort_cmp (const void *dra_, const void *drb_)

>   {

> -  data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);

> -  data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);

> +  data_ref_pair dra_pair = *(data_ref_pair *)const_cast<void *>(dra_);

> +  data_ref_pair drb_pair = *(data_ref_pair *)const_cast<void *>(drb_);

> +  data_reference_p dra = dra_pair.first;

> +  data_reference_p drb = drb_pair.first;

>     int cmp;

>

>     /* Stabilize sort.  */

> @@ -2772,10 +2776,13 @@ dr_group_sort_cmp (const void *dra_, const void *drb_)

>       return 0;

>

>     /* DRs in different loops never belong to the same group.  */


Comment needs to be adjusted to mention basic-block instead of loop

> -  loop_p loopa = gimple_bb (DR_STMT (dra))->loop_father;

> -  loop_p loopb = gimple_bb (DR_STMT (drb))->loop_father;

> -  if (loopa != loopb)

> -    return loopa->num < loopb->num ? -1 : 1;

> +  int bb_index1 = gimple_bb (DR_STMT (dra))->index;

> +  int bb_index2 = gimple_bb (DR_STMT (drb))->index;

> +  if (bb_index1 != bb_index2)

> +    return bb_index1 < bb_index2 ? -1 : 1;

> +

> +  if (dra_pair.second != drb_pair.second)

> +    return dra_pair.second < drb_pair.second ? -1 : 1;


Please add a comment that this compares the DR group
(not obvious because it says .second)

>

>     /* Ordering of DRs according to base.  */

>     cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),

> @@ -2881,11 +2888,11 @@ can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,

>      FORNOW: handle only arrays and pointer accesses.  */

>

>   opt_result

> -vect_analyze_data_ref_accesses (vec_info *vinfo)

> +vect_analyze_data_ref_accesses (vec_info *vinfo,

> +                               vec<int> *dataref_groups)

>   {

>     unsigned int i;

>     vec<data_reference_p> datarefs = vinfo->shared->datarefs;

> -  struct data_reference *dr;

>

>     DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses");

>

> @@ -2895,14 +2902,21 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)

>     /* Sort the array of datarefs to make building the interleaving chains

>        linear.  Don't modify the original vector's order, it is needed for

>        determining what dependencies are reversed.  */

> -  vec<data_reference_p> datarefs_copy = datarefs.copy ();

> +  vec<data_ref_pair> datarefs_copy;

> +  datarefs_copy.create (datarefs.length ());

> +  for (unsigned i = 0; i < datarefs.length (); i++)

> +    {

> +      int group_id = dataref_groups ? (*dataref_groups)[i] : 0;

> +      datarefs_copy.safe_push (data_ref_pair (datarefs[i], group_id));


quick_push

> +    }

>     datarefs_copy.qsort (dr_group_sort_cmp);

>     hash_set<stmt_vec_info> to_fixup;

>

>     /* Build the interleaving chains.  */

>     for (i = 0; i < datarefs_copy.length () - 1;)

>       {

> -      data_reference_p dra = datarefs_copy[i];

> +      data_reference_p dra = datarefs_copy[i].first;

> +      int dra_group_id = datarefs_copy[i].second;

>         dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);

>         stmt_vec_info stmtinfo_a = dr_info_a->stmt;

>         stmt_vec_info lastinfo = NULL;

> @@ -2914,7 +2928,8 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)

>         }

>         for (i = i + 1; i < datarefs_copy.length (); ++i)

>         {

> -         data_reference_p drb = datarefs_copy[i];

> +         data_reference_p drb = datarefs_copy[i].first;

> +         int drb_group_id = datarefs_copy[i].second;

>           dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);

>           stmt_vec_info stmtinfo_b = dr_info_b->stmt;

>           if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b)

> @@ -2927,10 +2942,14 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)

>              matters we can push those to a worklist and re-iterate

>              over them.  The we can just skip ahead to the next DR here.  */

>

> -         /* DRs in a different loop should not be put into the same

> +         /* DRs in a different BBs should not be put into the same

>              interleaving group.  */

> -         if (gimple_bb (DR_STMT (dra))->loop_father

> -             != gimple_bb (DR_STMT (drb))->loop_father)

> +         int bb_index1 = gimple_bb (DR_STMT (dra))->index;

> +         int bb_index2 = gimple_bb (DR_STMT (drb))->index;

> +         if (bb_index1 != bb_index2)

> +           break;

> +

> +         if (dra_group_id != drb_group_id)

>             break;

>

>           /* Check that the data-refs have same first location (except init)

> @@ -2977,7 +2996,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)

>           HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));

>           HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));

>           HOST_WIDE_INT init_prev

> -           = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]));

> +           = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1].first));

>           gcc_assert (init_a <= init_b

>                       && init_a <= init_prev

>                       && init_prev <= init_b);

> @@ -2985,7 +3004,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)

>           /* Do not place the same access in the interleaving chain twice.  */

>           if (init_b == init_prev)

>             {

> -             gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]))

> +             gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1].first))

>                           < gimple_uid (DR_STMT (drb)));

>               /* Simply link in duplicates and fix up the chain below.  */

>             }

> @@ -3098,9 +3117,10 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)

>         to_fixup.add (newgroup);

>       }

>

> -  FOR_EACH_VEC_ELT (datarefs_copy, i, dr)

> +  data_ref_pair *dr_pair;

> +  FOR_EACH_VEC_ELT (datarefs_copy, i, dr_pair)

>       {

> -      dr_vec_info *dr_info = vinfo->lookup_dr (dr);

> +      dr_vec_info *dr_info = vinfo->lookup_dr (dr_pair->first);

>         if (STMT_VINFO_VECTORIZABLE (dr_info->stmt)

>           && !vect_analyze_data_ref_access (vinfo, dr_info))

>         {

> @@ -3991,7 +4011,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,

>

>   opt_result

>   vect_find_stmt_data_reference (loop_p loop, gimple *stmt,

> -                              vec<data_reference_p> *datarefs)

> +                              vec<data_reference_p> *datarefs,

> +                              vec<int> *dataref_groups, int group_id)


You are always passing NULL here so simply avoid this and the following changes.

OK with those changes.
Thanks,
Richard.

>   {

>     /* We can ignore clobbers for dataref analysis - they are removed during

>        loop vectorization and BB vectorization checks dependences with a

> @@ -4118,6 +4139,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt,

>                       newdr->aux = (void *) (-1 - tree_to_uhwi (arg2));

>                       free_data_ref (dr);

>                       datarefs->safe_push (newdr);

> +                     if (dataref_groups)

> +                       dataref_groups->safe_push (group_id);

>                       return opt_result::success ();

>                     }

>                 }

> @@ -4127,6 +4150,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt,

>       }

>

>     datarefs->safe_push (dr);

> +  if (dataref_groups)

> +    dataref_groups->safe_push (group_id);

>     return opt_result::success ();

>   }

>

> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c

> index 185019c3305..2326d670bee 100644

> --- a/gcc/tree-vect-loop.c

> +++ b/gcc/tree-vect-loop.c

> @@ -1865,7 +1865,8 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,

>         if (is_gimple_debug (stmt))

>           continue;

>         ++(*n_stmts);

> -       opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs);

> +       opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs,

> +                                                       NULL, 0);

>         if (!res)

>           {

>             if (is_gimple_call (stmt) && loop->safelen)

> @@ -2087,7 +2088,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)

>     /* Analyze the access patterns of the data-refs in the loop (consecutive,

>        complex, etc.). FORNOW: Only handle consecutive access pattern.  */

>

> -  ok = vect_analyze_data_ref_accesses (loop_vinfo);

> +  ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);

>     if (!ok)

>       {

>         if (dump_enabled_p ())

> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c

> index 72192b5a813..167db076454 100644

> --- a/gcc/tree-vect-slp.c

> +++ b/gcc/tree-vect-slp.c

> @@ -3217,7 +3217,8 @@ vect_slp_check_for_constructors (bb_vec_info bb_vinfo)

>      region.  */

>

>   static bool

> -vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)

> +vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,

> +                      vec<int> *dataref_groups)

>   {

>     DUMP_VECT_SCOPE ("vect_slp_analyze_bb");

>

> @@ -3239,7 +3240,7 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)

>         return false;

>       }

>

> -  if (!vect_analyze_data_ref_accesses (bb_vinfo))

> +  if (!vect_analyze_data_ref_accesses (bb_vinfo, dataref_groups))

>       {

>        if (dump_enabled_p ())

>          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,

> @@ -3348,10 +3349,11 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)

>      given by DATAREFS.  */

>

>   static bool

> -vect_slp_bb_region (gimple_stmt_iterator region_begin,

> -                   gimple_stmt_iterator region_end,

> -                   vec<data_reference_p> datarefs,

> -                   unsigned int n_stmts)

> +vect_slp_region (gimple_stmt_iterator region_begin,

> +                gimple_stmt_iterator region_end,

> +                vec<data_reference_p> datarefs,

> +                vec<int> *dataref_groups,

> +                unsigned int n_stmts)

>   {

>     bb_vec_info bb_vinfo;

>     auto_vector_modes vector_modes;

> @@ -3378,7 +3380,7 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin,

>         bb_vinfo->shared->check_datarefs ();

>         bb_vinfo->vector_mode = next_vector_mode;

>

> -      if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)

> +      if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal, dataref_groups)

>           && dbg_cnt (vect_slp))

>         {

>           if (dump_enabled_p ())

> @@ -3473,45 +3475,30 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin,

>   bool

>   vect_slp_bb (basic_block bb)

>   {

> -  gimple_stmt_iterator gsi;

> -  bool any_vectorized = false;

> -

> -  gsi = gsi_after_labels (bb);

> -  while (!gsi_end_p (gsi))

> +  vec<data_reference_p> datarefs = vNULL;

> +  vec<int> dataref_groups = vNULL;

> +  int insns = 0;

> +  int current_group = 0;

> +  gimple_stmt_iterator region_begin = gsi_start_nondebug_after_labels_bb (bb);

> +  gimple_stmt_iterator region_end = gsi_last_bb (bb);

> +  if (!gsi_end_p (region_end))

> +    gsi_next (&region_end);

> +

> +  for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);

> +       gsi_next (&gsi))

>       {

> -      gimple_stmt_iterator region_begin = gsi;

> -      vec<data_reference_p> datarefs = vNULL;

> -      int insns = 0;

> -

> -      for (; !gsi_end_p (gsi); gsi_next (&gsi))

> -       {

> -         gimple *stmt = gsi_stmt (gsi);

> -         if (is_gimple_debug (stmt))

> -           {

> -             /* Skip leading debug stmts.  */

> -             if (gsi_stmt (region_begin) == stmt)

> -               gsi_next (&region_begin);

> -             continue;

> -           }

> -         insns++;

> -

> -         if (gimple_location (stmt) != UNKNOWN_LOCATION)

> -           vect_location = stmt;

> +      gimple *stmt = gsi_stmt (gsi);

> +      if (is_gimple_debug (stmt))

> +       continue;

>

> -         if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))

> -           break;

> -       }

> -      if (gsi_end_p (region_begin))

> -       break;

> +      insns++;

>

> -      /* Skip leading unhandled stmts.  */

> -      if (gsi_stmt (region_begin) == gsi_stmt (gsi))

> -       {

> -         gsi_next (&gsi);

> -         continue;

> -       }

> +      if (gimple_location (stmt) != UNKNOWN_LOCATION)

> +       vect_location = stmt;

>

> -      gimple_stmt_iterator region_end = gsi;

> +      if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs,

> +                                         &dataref_groups, current_group))

> +       ++current_group;

>

>         if (insns > param_slp_max_insns_in_bb)

>         {

> @@ -3520,17 +3507,10 @@ vect_slp_bb (basic_block bb)

>                              "not vectorized: too many instructions in "

>                              "basic block.\n");

>         }

> -      else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))

> -       any_vectorized = true;

> -

> -      if (gsi_end_p (region_end))

> -       break;

> -

> -      /* Skip the unhandled stmt.  */

> -      gsi_next (&gsi);

>       }

>

> -  return any_vectorized;

> +  return vect_slp_region (region_begin, region_end, datarefs,

> +                         &dataref_groups, insns);

>   }

>

>

> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h

> index 5466c78c20b..21881a9390c 100644

> --- a/gcc/tree-vectorizer.h

> +++ b/gcc/tree-vectorizer.h

> @@ -1917,14 +1917,15 @@ extern bool vect_slp_analyze_instance_dependence (vec_info *, slp_instance);

>   extern opt_result vect_enhance_data_refs_alignment (loop_vec_info);

>   extern opt_result vect_analyze_data_refs_alignment (loop_vec_info);

>   extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance);

> -extern opt_result vect_analyze_data_ref_accesses (vec_info *);

> +extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *);

>   extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);

>   extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,

>                                       tree, int, internal_fn *, tree *);

>   extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,

>                                        gather_scatter_info *);

>   extern opt_result vect_find_stmt_data_reference (loop_p, gimple *,

> -                                                vec<data_reference_p> *);

> +                                                vec<data_reference_p> *,

> +                                                vec<int> *, int);

>   extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *, bool *);

>   extern void vect_record_base_alignments (vec_info *);

>   extern tree vect_create_data_ref_ptr (vec_info *,

> --

> 2.27.0

>
Martin Liška Aug. 10, 2020, 10:29 a.m. | #2
On 8/3/20 12:29 PM, Richard Biener wrote:
> You are always passing NULL here so simply avoid this and the following changes.


Are you sure about this?

Note that vect_slp_bb does:

+      if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs,
+                                         &dataref_groups, current_group))
+       ++current_group;

Martin
Marek Polacek via Gcc-patches Aug. 24, 2020, 6:55 a.m. | #3
On Mon, Aug 10, 2020 at 12:29 PM Martin Liška <mliska@suse.cz> wrote:
>

> On 8/3/20 12:29 PM, Richard Biener wrote:

> > You are always passing NULL here so simply avoid this and the following changes.

>

> Are you sure about this?

>

> Note that vect_slp_bb does:

>

> +      if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs,

> +                                         &dataref_groups, current_group))

> +       ++current_group;


Oops, I stand corrected.  Patch is OK without this particular requested change.

Richard.

> Martin

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-38.c b/gcc/testsuite/gcc.dg/vect/bb-slp-38.c
index 59aec54fffd..9c57ea3c2c1 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-38.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-38.c
@@ -41,4 +41,4 @@  int main()
  }
  
  /* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target vect_perm } } } */
-/* { dg-final { scan-tree-dump-times "basic block part vectorized" 2 "slp2" { target vect_perm } } } */
+/* { dg-final { scan-tree-dump "basic block part vectorized" "slp2" { target vect_perm } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-45.c b/gcc/testsuite/gcc.dg/vect/bb-slp-45.c
new file mode 100644
index 00000000000..4107a34cb93
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-45.c
@@ -0,0 +1,36 @@ 
+/* { dg-require-effective-target vect_double } */
+
+#include "tree-vect.h"
+
+extern void abort (void);
+
+double a[8], b[8];
+int x;
+
+void __attribute__((noinline,noclone))
+bar (void)
+{
+  x = 1;
+}
+
+void __attribute__((noinline,noclone))
+foo(int i)
+{
+  double tem1 = a[2*i];
+  double tem2 = 2*a[2*i+1];
+  bar ();
+  b[2*i] = 2*tem1;
+  b[2*i+1] = tem2;
+}
+
+int main()
+{
+  int i;
+  check_vect ();
+  for (i = 0; i < 8; ++i)
+    b[i] = i;
+  foo (2);
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" } } */
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index a78ae61d1b0..36e10ff3619 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -2757,14 +2757,18 @@  vect_analyze_data_ref_access (vec_info *vinfo, dr_vec_info *dr_info)
    return vect_analyze_group_access (vinfo, dr_info);
  }
  
+typedef std::pair<data_reference_p, int> data_ref_pair;
+
  /* Compare two data-references DRA and DRB to group them into chunks
     suitable for grouping.  */
  
  static int
  dr_group_sort_cmp (const void *dra_, const void *drb_)
  {
-  data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
-  data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
+  data_ref_pair dra_pair = *(data_ref_pair *)const_cast<void *>(dra_);
+  data_ref_pair drb_pair = *(data_ref_pair *)const_cast<void *>(drb_);
+  data_reference_p dra = dra_pair.first;
+  data_reference_p drb = drb_pair.first;
    int cmp;
  
    /* Stabilize sort.  */
@@ -2772,10 +2776,13 @@  dr_group_sort_cmp (const void *dra_, const void *drb_)
      return 0;
  
    /* DRs in different loops never belong to the same group.  */
-  loop_p loopa = gimple_bb (DR_STMT (dra))->loop_father;
-  loop_p loopb = gimple_bb (DR_STMT (drb))->loop_father;
-  if (loopa != loopb)
-    return loopa->num < loopb->num ? -1 : 1;
+  int bb_index1 = gimple_bb (DR_STMT (dra))->index;
+  int bb_index2 = gimple_bb (DR_STMT (drb))->index;
+  if (bb_index1 != bb_index2)
+    return bb_index1 < bb_index2 ? -1 : 1;
+
+  if (dra_pair.second != drb_pair.second)
+    return dra_pair.second < drb_pair.second ? -1 : 1;
  
    /* Ordering of DRs according to base.  */
    cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
@@ -2881,11 +2888,11 @@  can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
     FORNOW: handle only arrays and pointer accesses.  */
  
  opt_result
-vect_analyze_data_ref_accesses (vec_info *vinfo)
+vect_analyze_data_ref_accesses (vec_info *vinfo,
+				vec<int> *dataref_groups)
  {
    unsigned int i;
    vec<data_reference_p> datarefs = vinfo->shared->datarefs;
-  struct data_reference *dr;
  
    DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses");
  
@@ -2895,14 +2902,21 @@  vect_analyze_data_ref_accesses (vec_info *vinfo)
    /* Sort the array of datarefs to make building the interleaving chains
       linear.  Don't modify the original vector's order, it is needed for
       determining what dependencies are reversed.  */
-  vec<data_reference_p> datarefs_copy = datarefs.copy ();
+  vec<data_ref_pair> datarefs_copy;
+  datarefs_copy.create (datarefs.length ());
+  for (unsigned i = 0; i < datarefs.length (); i++)
+    {
+      int group_id = dataref_groups ? (*dataref_groups)[i] : 0;
+      datarefs_copy.safe_push (data_ref_pair (datarefs[i], group_id));
+    }
    datarefs_copy.qsort (dr_group_sort_cmp);
    hash_set<stmt_vec_info> to_fixup;
  
    /* Build the interleaving chains.  */
    for (i = 0; i < datarefs_copy.length () - 1;)
      {
-      data_reference_p dra = datarefs_copy[i];
+      data_reference_p dra = datarefs_copy[i].first;
+      int dra_group_id = datarefs_copy[i].second;
        dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);
        stmt_vec_info stmtinfo_a = dr_info_a->stmt;
        stmt_vec_info lastinfo = NULL;
@@ -2914,7 +2928,8 @@  vect_analyze_data_ref_accesses (vec_info *vinfo)
  	}
        for (i = i + 1; i < datarefs_copy.length (); ++i)
  	{
-	  data_reference_p drb = datarefs_copy[i];
+	  data_reference_p drb = datarefs_copy[i].first;
+	  int drb_group_id = datarefs_copy[i].second;
  	  dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);
  	  stmt_vec_info stmtinfo_b = dr_info_b->stmt;
  	  if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b)
@@ -2927,10 +2942,14 @@  vect_analyze_data_ref_accesses (vec_info *vinfo)
  	     matters we can push those to a worklist and re-iterate
  	     over them.  The we can just skip ahead to the next DR here.  */
  
-	  /* DRs in a different loop should not be put into the same
+	  /* DRs in a different BBs should not be put into the same
  	     interleaving group.  */
-	  if (gimple_bb (DR_STMT (dra))->loop_father
-	      != gimple_bb (DR_STMT (drb))->loop_father)
+	  int bb_index1 = gimple_bb (DR_STMT (dra))->index;
+	  int bb_index2 = gimple_bb (DR_STMT (drb))->index;
+	  if (bb_index1 != bb_index2)
+	    break;
+
+	  if (dra_group_id != drb_group_id)
  	    break;
  
  	  /* Check that the data-refs have same first location (except init)
@@ -2977,7 +2996,7 @@  vect_analyze_data_ref_accesses (vec_info *vinfo)
  	  HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
  	  HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
  	  HOST_WIDE_INT init_prev
-	    = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]));
+	    = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1].first));
  	  gcc_assert (init_a <= init_b
  		      && init_a <= init_prev
  		      && init_prev <= init_b);
@@ -2985,7 +3004,7 @@  vect_analyze_data_ref_accesses (vec_info *vinfo)
  	  /* Do not place the same access in the interleaving chain twice.  */
  	  if (init_b == init_prev)
  	    {
-	      gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]))
+	      gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1].first))
  			  < gimple_uid (DR_STMT (drb)));
  	      /* Simply link in duplicates and fix up the chain below.  */
  	    }
@@ -3098,9 +3117,10 @@  vect_analyze_data_ref_accesses (vec_info *vinfo)
        to_fixup.add (newgroup);
      }
  
-  FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
+  data_ref_pair *dr_pair;
+  FOR_EACH_VEC_ELT (datarefs_copy, i, dr_pair)
      {
-      dr_vec_info *dr_info = vinfo->lookup_dr (dr);
+      dr_vec_info *dr_info = vinfo->lookup_dr (dr_pair->first);
        if (STMT_VINFO_VECTORIZABLE (dr_info->stmt)
  	  && !vect_analyze_data_ref_access (vinfo, dr_info))
  	{
@@ -3991,7 +4011,8 @@  vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
  
  opt_result
  vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
-			       vec<data_reference_p> *datarefs)
+			       vec<data_reference_p> *datarefs,
+			       vec<int> *dataref_groups, int group_id)
  {
    /* We can ignore clobbers for dataref analysis - they are removed during
       loop vectorization and BB vectorization checks dependences with a
@@ -4118,6 +4139,8 @@  vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
  		      newdr->aux = (void *) (-1 - tree_to_uhwi (arg2));
  		      free_data_ref (dr);
  		      datarefs->safe_push (newdr);
+		      if (dataref_groups)
+			dataref_groups->safe_push (group_id);
  		      return opt_result::success ();
  		    }
  		}
@@ -4127,6 +4150,8 @@  vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
      }
  
    datarefs->safe_push (dr);
+  if (dataref_groups)
+    dataref_groups->safe_push (group_id);
    return opt_result::success ();
  }
  
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 185019c3305..2326d670bee 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1865,7 +1865,8 @@  vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
  	if (is_gimple_debug (stmt))
  	  continue;
  	++(*n_stmts);
-	opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs);
+	opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs,
+							NULL, 0);
  	if (!res)
  	  {
  	    if (is_gimple_call (stmt) && loop->safelen)
@@ -2087,7 +2088,7 @@  vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
    /* Analyze the access patterns of the data-refs in the loop (consecutive,
       complex, etc.). FORNOW: Only handle consecutive access pattern.  */
  
-  ok = vect_analyze_data_ref_accesses (loop_vinfo);
+  ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
    if (!ok)
      {
        if (dump_enabled_p ())
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 72192b5a813..167db076454 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -3217,7 +3217,8 @@  vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
     region.  */
  
  static bool
-vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
+vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
+		       vec<int> *dataref_groups)
  {
    DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
  
@@ -3239,7 +3240,7 @@  vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
        return false;
      }
  
-  if (!vect_analyze_data_ref_accesses (bb_vinfo))
+  if (!vect_analyze_data_ref_accesses (bb_vinfo, dataref_groups))
      {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -3348,10 +3349,11 @@  vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
     given by DATAREFS.  */
  
  static bool
-vect_slp_bb_region (gimple_stmt_iterator region_begin,
-		    gimple_stmt_iterator region_end,
-		    vec<data_reference_p> datarefs,
-		    unsigned int n_stmts)
+vect_slp_region (gimple_stmt_iterator region_begin,
+		 gimple_stmt_iterator region_end,
+		 vec<data_reference_p> datarefs,
+		 vec<int> *dataref_groups,
+		 unsigned int n_stmts)
  {
    bb_vec_info bb_vinfo;
    auto_vector_modes vector_modes;
@@ -3378,7 +3380,7 @@  vect_slp_bb_region (gimple_stmt_iterator region_begin,
  	bb_vinfo->shared->check_datarefs ();
        bb_vinfo->vector_mode = next_vector_mode;
  
-      if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
+      if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal, dataref_groups)
  	  && dbg_cnt (vect_slp))
  	{
  	  if (dump_enabled_p ())
@@ -3473,45 +3475,30 @@  vect_slp_bb_region (gimple_stmt_iterator region_begin,
  bool
  vect_slp_bb (basic_block bb)
  {
-  gimple_stmt_iterator gsi;
-  bool any_vectorized = false;
-
-  gsi = gsi_after_labels (bb);
-  while (!gsi_end_p (gsi))
+  vec<data_reference_p> datarefs = vNULL;
+  vec<int> dataref_groups = vNULL;
+  int insns = 0;
+  int current_group = 0;
+  gimple_stmt_iterator region_begin = gsi_start_nondebug_after_labels_bb (bb);
+  gimple_stmt_iterator region_end = gsi_last_bb (bb);
+  if (!gsi_end_p (region_end))
+    gsi_next (&region_end);
+
+  for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
+       gsi_next (&gsi))
      {
-      gimple_stmt_iterator region_begin = gsi;
-      vec<data_reference_p> datarefs = vNULL;
-      int insns = 0;
-
-      for (; !gsi_end_p (gsi); gsi_next (&gsi))
-	{
-	  gimple *stmt = gsi_stmt (gsi);
-	  if (is_gimple_debug (stmt))
-	    {
-	      /* Skip leading debug stmts.  */
-	      if (gsi_stmt (region_begin) == stmt)
-		gsi_next (&region_begin);
-	      continue;
-	    }
-	  insns++;
-
-	  if (gimple_location (stmt) != UNKNOWN_LOCATION)
-	    vect_location = stmt;
+      gimple *stmt = gsi_stmt (gsi);
+      if (is_gimple_debug (stmt))
+	continue;
  
-	  if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
-	    break;
-	}
-      if (gsi_end_p (region_begin))
-	break;
+      insns++;
  
-      /* Skip leading unhandled stmts.  */
-      if (gsi_stmt (region_begin) == gsi_stmt (gsi))
-	{
-	  gsi_next (&gsi);
-	  continue;
-	}
+      if (gimple_location (stmt) != UNKNOWN_LOCATION)
+	vect_location = stmt;
  
-      gimple_stmt_iterator region_end = gsi;
+      if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs,
+					  &dataref_groups, current_group))
+	++current_group;
  
        if (insns > param_slp_max_insns_in_bb)
  	{
@@ -3520,17 +3507,10 @@  vect_slp_bb (basic_block bb)
  			     "not vectorized: too many instructions in "
  			     "basic block.\n");
  	}
-      else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
-	any_vectorized = true;
-
-      if (gsi_end_p (region_end))
-	break;
-
-      /* Skip the unhandled stmt.  */
-      gsi_next (&gsi);
      }
  
-  return any_vectorized;
+  return vect_slp_region (region_begin, region_end, datarefs,
+			  &dataref_groups, insns);
  }
  
  
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 5466c78c20b..21881a9390c 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1917,14 +1917,15 @@  extern bool vect_slp_analyze_instance_dependence (vec_info *, slp_instance);
  extern opt_result vect_enhance_data_refs_alignment (loop_vec_info);
  extern opt_result vect_analyze_data_refs_alignment (loop_vec_info);
  extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance);
-extern opt_result vect_analyze_data_ref_accesses (vec_info *);
+extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *);
  extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
  extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
  				      tree, int, internal_fn *, tree *);
  extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,
  				       gather_scatter_info *);
  extern opt_result vect_find_stmt_data_reference (loop_p, gimple *,
-						 vec<data_reference_p> *);
+						 vec<data_reference_p> *,
+						 vec<int> *, int);
  extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *, bool *);
  extern void vect_record_base_alignments (vec_info *);
  extern tree vect_create_data_ref_ptr (vec_info *,