RISC-V: Enable overlap-by-pieces via tune param

Message ID 20210721233245.3301523-1-cmuellner@gcc.gnu.org
State New
Headers show
Series
  • RISC-V: Enable overlap-by-pieces via tune param
Related show

Commit Message

Feng Xue OS via Gcc-patches July 21, 2021, 11:32 p.m.
This patch adds the field overlap_op_by_pieces to the struct
riscv_tune_param, which allows to enable the overlap_op_by_pieces
feature of the by-pieces infrastructure.

gcc/ChangeLog:

	* config/riscv/riscv.c (struct riscv_tune_param): New field.
	(riscv_overlap_op_by_pieces): New function.
	(TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
	riscv_overlap_op_by_pieces.

Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>

---
 gcc/config/riscv/riscv.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

-- 
2.31.1

Comments

Feng Xue OS via Gcc-patches July 22, 2021, 8:52 a.m. | #1
It's my first time seeing this hook :p Did you mind describing when we
need to set it to true?
I mean when a CPU has some feature then we can/should set it to true?


On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>

> This patch adds the field overlap_op_by_pieces to the struct

> riscv_tune_param, which allows to enable the overlap_op_by_pieces

> feature of the by-pieces infrastructure.

>

> gcc/ChangeLog:

>

>         * config/riscv/riscv.c (struct riscv_tune_param): New field.

>         (riscv_overlap_op_by_pieces): New function.

>         (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to

>         riscv_overlap_op_by_pieces.

>

> Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>

> ---

>  gcc/config/riscv/riscv.c | 14 ++++++++++++++

>  1 file changed, 14 insertions(+)

>

> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c

> index 576960bb37c..824e930ef05 100644

> --- a/gcc/config/riscv/riscv.c

> +++ b/gcc/config/riscv/riscv.c

> @@ -220,6 +220,7 @@ struct riscv_tune_param

>    unsigned short branch_cost;

>    unsigned short memory_cost;

>    bool slow_unaligned_access;

> +  bool overlap_op_by_pieces;

>  };

>

>  /* Information about one micro-arch we know about.  */

> @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {

>    3,                                           /* branch_cost */

>    5,                                           /* memory_cost */

>    true,                                                /* slow_unaligned_access */

> +  false,                                       /* overlap_op_by_pieces */

>  };

>

>  /* Costs to use when optimizing for Sifive 7 Series.  */

> @@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {

>    4,                                           /* branch_cost */

>    3,                                           /* memory_cost */

>    true,                                                /* slow_unaligned_access */

> +  false,                                       /* overlap_op_by_pieces */

>  };

>

>  /* Costs to use when optimizing for T-HEAD c906.  */

> @@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {

>    3,            /* branch_cost */

>    5,            /* memory_cost */

>    false,            /* slow_unaligned_access */

> +  false,                                       /* overlap_op_by_pieces */

>  };

>

>  /* Costs to use when optimizing for size.  */

> @@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {

>    1,                                           /* branch_cost */

>    2,                                           /* memory_cost */

>    false,                                       /* slow_unaligned_access */

> +  false,                                       /* overlap_op_by_pieces */

>  };

>

>  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);

> @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)

>    return riscv_slow_unaligned_access_p;

>  }

>

> +static bool

> +riscv_overlap_op_by_pieces (void)

> +{

> +  return tune_param->overlap_op_by_pieces;

> +}

> +

>  /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */

>

>  static bool

> @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)

>  #undef TARGET_SLOW_UNALIGNED_ACCESS

>  #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access

>

> +#undef TARGET_OVERLAP_OP_BY_PIECES_P

> +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces

> +

>  #undef TARGET_SECONDARY_MEMORY_NEEDED

>  #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed

>

> --

> 2.31.1

>
Feng Xue OS via Gcc-patches July 22, 2021, 9:20 a.m. | #2
On Thu, Jul 22, 2021 at 10:53 AM Kito Cheng <kito.cheng@gmail.com> wrote:
>

> It's my first time seeing this hook :p Did you mind describing when we

> need to set it to true?

> I mean when a CPU has some feature then we can/should set it to true?


The by-pieces infrastructure allows to inline builtins quite well and
uses slow_unaligned_access and overlap_op_by_pieces to tune the
emitted instruction sequence.

In case unaligned accesses are fast, then overlap_op_by_pieces can reduce
the number of instructions (emitted by by-pieces for e.g. memset) for the cost
of overlapping memory accesses.

E.g. you want to clear 15-bytes with memset (and cheap unaligned accesses):
Without overlap_op_by_pieces you will get:
  8e:   00053023                sd      zero,0(a0)
  92:   00052423                sw      zero,8(a0)
  96:   00051623                sh      zero,12(a0)
  9a:   00050723                sb      zero,14(a0)
With overlap_op_by_pieces you will get:
  7e:   00053023                sd      zero,0(a0)
  82:   000533a3                sd      zero,7(a0)

BR
Christoph

>

>

> On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches

> <gcc-patches@gcc.gnu.org> wrote:

> >

> > This patch adds the field overlap_op_by_pieces to the struct

> > riscv_tune_param, which allows to enable the overlap_op_by_pieces

> > feature of the by-pieces infrastructure.

> >

> > gcc/ChangeLog:

> >

> >         * config/riscv/riscv.c (struct riscv_tune_param): New field.

> >         (riscv_overlap_op_by_pieces): New function.

> >         (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to

> >         riscv_overlap_op_by_pieces.

> >

> > Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>

> > ---

> >  gcc/config/riscv/riscv.c | 14 ++++++++++++++

> >  1 file changed, 14 insertions(+)

> >

> > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c

> > index 576960bb37c..824e930ef05 100644

> > --- a/gcc/config/riscv/riscv.c

> > +++ b/gcc/config/riscv/riscv.c

> > @@ -220,6 +220,7 @@ struct riscv_tune_param

> >    unsigned short branch_cost;

> >    unsigned short memory_cost;

> >    bool slow_unaligned_access;

> > +  bool overlap_op_by_pieces;

> >  };

> >

> >  /* Information about one micro-arch we know about.  */

> > @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {

> >    3,                                           /* branch_cost */

> >    5,                                           /* memory_cost */

> >    true,                                                /* slow_unaligned_access */

> > +  false,                                       /* overlap_op_by_pieces */

> >  };

> >

> >  /* Costs to use when optimizing for Sifive 7 Series.  */

> > @@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {

> >    4,                                           /* branch_cost */

> >    3,                                           /* memory_cost */

> >    true,                                                /* slow_unaligned_access */

> > +  false,                                       /* overlap_op_by_pieces */

> >  };

> >

> >  /* Costs to use when optimizing for T-HEAD c906.  */

> > @@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {

> >    3,            /* branch_cost */

> >    5,            /* memory_cost */

> >    false,            /* slow_unaligned_access */

> > +  false,                                       /* overlap_op_by_pieces */

> >  };

> >

> >  /* Costs to use when optimizing for size.  */

> > @@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {

> >    1,                                           /* branch_cost */

> >    2,                                           /* memory_cost */

> >    false,                                       /* slow_unaligned_access */

> > +  false,                                       /* overlap_op_by_pieces */

> >  };

> >

> >  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);

> > @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)

> >    return riscv_slow_unaligned_access_p;

> >  }

> >

> > +static bool

> > +riscv_overlap_op_by_pieces (void)

> > +{

> > +  return tune_param->overlap_op_by_pieces;

> > +}

> > +

> >  /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */

> >

> >  static bool

> > @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)

> >  #undef TARGET_SLOW_UNALIGNED_ACCESS

> >  #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access

> >

> > +#undef TARGET_OVERLAP_OP_BY_PIECES_P

> > +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces

> > +

> >  #undef TARGET_SECONDARY_MEMORY_NEEDED

> >  #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed

> >

> > --

> > 2.31.1

> >
Feng Xue OS via Gcc-patches July 22, 2021, 9:28 a.m. | #3
Sounds like we could just use !tune_param->slow_unaligned_access for
TARGET_OVERLAP_OP_BY_PIECES_P?
since it improves both performance and code size if we have cheap
unaligned accesses.

On Thu, Jul 22, 2021 at 5:23 PM Christoph Müllner via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>

> On Thu, Jul 22, 2021 at 10:53 AM Kito Cheng <kito.cheng@gmail.com> wrote:

> >

> > It's my first time seeing this hook :p Did you mind describing when we

> > need to set it to true?

> > I mean when a CPU has some feature then we can/should set it to true?

>

> The by-pieces infrastructure allows to inline builtins quite well and

> uses slow_unaligned_access and overlap_op_by_pieces to tune the

> emitted instruction sequence.

>

> In case unaligned accesses are fast, then overlap_op_by_pieces can reduce

> the number of instructions (emitted by by-pieces for e.g. memset) for the cost

> of overlapping memory accesses.

>

> E.g. you want to clear 15-bytes with memset (and cheap unaligned accesses):

> Without overlap_op_by_pieces you will get:

>   8e:   00053023                sd      zero,0(a0)

>   92:   00052423                sw      zero,8(a0)

>   96:   00051623                sh      zero,12(a0)

>   9a:   00050723                sb      zero,14(a0)

> With overlap_op_by_pieces you will get:

>   7e:   00053023                sd      zero,0(a0)

>   82:   000533a3                sd      zero,7(a0)

>

> BR

> Christoph

>

> >

> >

> > On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches

> > <gcc-patches@gcc.gnu.org> wrote:

> > >

> > > This patch adds the field overlap_op_by_pieces to the struct

> > > riscv_tune_param, which allows to enable the overlap_op_by_pieces

> > > feature of the by-pieces infrastructure.

> > >

> > > gcc/ChangeLog:

> > >

> > >         * config/riscv/riscv.c (struct riscv_tune_param): New field.

> > >         (riscv_overlap_op_by_pieces): New function.

> > >         (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to

> > >         riscv_overlap_op_by_pieces.

> > >

> > > Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>

> > > ---

> > >  gcc/config/riscv/riscv.c | 14 ++++++++++++++

> > >  1 file changed, 14 insertions(+)

> > >

> > > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c

> > > index 576960bb37c..824e930ef05 100644

> > > --- a/gcc/config/riscv/riscv.c

> > > +++ b/gcc/config/riscv/riscv.c

> > > @@ -220,6 +220,7 @@ struct riscv_tune_param

> > >    unsigned short branch_cost;

> > >    unsigned short memory_cost;

> > >    bool slow_unaligned_access;

> > > +  bool overlap_op_by_pieces;

> > >  };

> > >

> > >  /* Information about one micro-arch we know about.  */

> > > @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {

> > >    3,                                           /* branch_cost */

> > >    5,                                           /* memory_cost */

> > >    true,                                                /* slow_unaligned_access */

> > > +  false,                                       /* overlap_op_by_pieces */

> > >  };

> > >

> > >  /* Costs to use when optimizing for Sifive 7 Series.  */

> > > @@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {

> > >    4,                                           /* branch_cost */

> > >    3,                                           /* memory_cost */

> > >    true,                                                /* slow_unaligned_access */

> > > +  false,                                       /* overlap_op_by_pieces */

> > >  };

> > >

> > >  /* Costs to use when optimizing for T-HEAD c906.  */

> > > @@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {

> > >    3,            /* branch_cost */

> > >    5,            /* memory_cost */

> > >    false,            /* slow_unaligned_access */

> > > +  false,                                       /* overlap_op_by_pieces */

> > >  };

> > >

> > >  /* Costs to use when optimizing for size.  */

> > > @@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {

> > >    1,                                           /* branch_cost */

> > >    2,                                           /* memory_cost */

> > >    false,                                       /* slow_unaligned_access */

> > > +  false,                                       /* overlap_op_by_pieces */

> > >  };

> > >

> > >  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);

> > > @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)

> > >    return riscv_slow_unaligned_access_p;

> > >  }

> > >

> > > +static bool

> > > +riscv_overlap_op_by_pieces (void)

> > > +{

> > > +  return tune_param->overlap_op_by_pieces;

> > > +}

> > > +

> > >  /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */

> > >

> > >  static bool

> > > @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)

> > >  #undef TARGET_SLOW_UNALIGNED_ACCESS

> > >  #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access

> > >

> > > +#undef TARGET_OVERLAP_OP_BY_PIECES_P

> > > +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces

> > > +

> > >  #undef TARGET_SECONDARY_MEMORY_NEEDED

> > >  #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed

> > >

> > > --

> > > 2.31.1

> > >
Feng Xue OS via Gcc-patches July 22, 2021, 12:27 p.m. | #4
On Thu, Jul 22, 2021 at 11:29 AM Kito Cheng <kito.cheng@gmail.com> wrote:
>

> Sounds like we could just use !tune_param->slow_unaligned_access for

> TARGET_OVERLAP_OP_BY_PIECES_P?

> since it improves both performance and code size if we have cheap

> unaligned accesses.


Fine for me as well.
I'll prepare a v2, that uses enables overlap_op_by_pieces if
slow_unaligned_access==false.

>

> On Thu, Jul 22, 2021 at 5:23 PM Christoph Müllner via Gcc-patches

> <gcc-patches@gcc.gnu.org> wrote:

> >

> > On Thu, Jul 22, 2021 at 10:53 AM Kito Cheng <kito.cheng@gmail.com> wrote:

> > >

> > > It's my first time seeing this hook :p Did you mind describing when we

> > > need to set it to true?

> > > I mean when a CPU has some feature then we can/should set it to true?

> >

> > The by-pieces infrastructure allows to inline builtins quite well and

> > uses slow_unaligned_access and overlap_op_by_pieces to tune the

> > emitted instruction sequence.

> >

> > In case unaligned accesses are fast, then overlap_op_by_pieces can reduce

> > the number of instructions (emitted by by-pieces for e.g. memset) for the cost

> > of overlapping memory accesses.

> >

> > E.g. you want to clear 15-bytes with memset (and cheap unaligned accesses):

> > Without overlap_op_by_pieces you will get:

> >   8e:   00053023                sd      zero,0(a0)

> >   92:   00052423                sw      zero,8(a0)

> >   96:   00051623                sh      zero,12(a0)

> >   9a:   00050723                sb      zero,14(a0)

> > With overlap_op_by_pieces you will get:

> >   7e:   00053023                sd      zero,0(a0)

> >   82:   000533a3                sd      zero,7(a0)

> >

> > BR

> > Christoph

> >

> > >

> > >

> > > On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches

> > > <gcc-patches@gcc.gnu.org> wrote:

> > > >

> > > > This patch adds the field overlap_op_by_pieces to the struct

> > > > riscv_tune_param, which allows to enable the overlap_op_by_pieces

> > > > feature of the by-pieces infrastructure.

> > > >

> > > > gcc/ChangeLog:

> > > >

> > > >         * config/riscv/riscv.c (struct riscv_tune_param): New field.

> > > >         (riscv_overlap_op_by_pieces): New function.

> > > >         (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to

> > > >         riscv_overlap_op_by_pieces.

> > > >

> > > > Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>

> > > > ---

> > > >  gcc/config/riscv/riscv.c | 14 ++++++++++++++

> > > >  1 file changed, 14 insertions(+)

> > > >

> > > > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c

> > > > index 576960bb37c..824e930ef05 100644

> > > > --- a/gcc/config/riscv/riscv.c

> > > > +++ b/gcc/config/riscv/riscv.c

> > > > @@ -220,6 +220,7 @@ struct riscv_tune_param

> > > >    unsigned short branch_cost;

> > > >    unsigned short memory_cost;

> > > >    bool slow_unaligned_access;

> > > > +  bool overlap_op_by_pieces;

> > > >  };

> > > >

> > > >  /* Information about one micro-arch we know about.  */

> > > > @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {

> > > >    3,                                           /* branch_cost */

> > > >    5,                                           /* memory_cost */

> > > >    true,                                                /* slow_unaligned_access */

> > > > +  false,                                       /* overlap_op_by_pieces */

> > > >  };

> > > >

> > > >  /* Costs to use when optimizing for Sifive 7 Series.  */

> > > > @@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {

> > > >    4,                                           /* branch_cost */

> > > >    3,                                           /* memory_cost */

> > > >    true,                                                /* slow_unaligned_access */

> > > > +  false,                                       /* overlap_op_by_pieces */

> > > >  };

> > > >

> > > >  /* Costs to use when optimizing for T-HEAD c906.  */

> > > > @@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {

> > > >    3,            /* branch_cost */

> > > >    5,            /* memory_cost */

> > > >    false,            /* slow_unaligned_access */

> > > > +  false,                                       /* overlap_op_by_pieces */

> > > >  };

> > > >

> > > >  /* Costs to use when optimizing for size.  */

> > > > @@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {

> > > >    1,                                           /* branch_cost */

> > > >    2,                                           /* memory_cost */

> > > >    false,                                       /* slow_unaligned_access */

> > > > +  false,                                       /* overlap_op_by_pieces */

> > > >  };

> > > >

> > > >  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);

> > > > @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)

> > > >    return riscv_slow_unaligned_access_p;

> > > >  }

> > > >

> > > > +static bool

> > > > +riscv_overlap_op_by_pieces (void)

> > > > +{

> > > > +  return tune_param->overlap_op_by_pieces;

> > > > +}

> > > > +

> > > >  /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */

> > > >

> > > >  static bool

> > > > @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)

> > > >  #undef TARGET_SLOW_UNALIGNED_ACCESS

> > > >  #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access

> > > >

> > > > +#undef TARGET_OVERLAP_OP_BY_PIECES_P

> > > > +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces

> > > > +

> > > >  #undef TARGET_SECONDARY_MEMORY_NEEDED

> > > >  #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed

> > > >

> > > > --

> > > > 2.31.1

> > > >
Feng Xue OS via Gcc-patches July 22, 2021, 12:54 p.m. | #5
On Thu, Jul 22, 2021 at 2:27 PM Christoph Müllner <cmuellner@gcc.gnu.org> wrote:
>

> On Thu, Jul 22, 2021 at 11:29 AM Kito Cheng <kito.cheng@gmail.com> wrote:

> >

> > Sounds like we could just use !tune_param->slow_unaligned_access for

> > TARGET_OVERLAP_OP_BY_PIECES_P?

> > since it improves both performance and code size if we have cheap

> > unaligned accesses.

>

> Fine for me as well.

> I'll prepare a v2, that uses enables overlap_op_by_pieces if

> slow_unaligned_access==false.


The new patch can be found here:
https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575832.html

>

> >

> > On Thu, Jul 22, 2021 at 5:23 PM Christoph Müllner via Gcc-patches

> > <gcc-patches@gcc.gnu.org> wrote:

> > >

> > > On Thu, Jul 22, 2021 at 10:53 AM Kito Cheng <kito.cheng@gmail.com> wrote:

> > > >

> > > > It's my first time seeing this hook :p Did you mind describing when we

> > > > need to set it to true?

> > > > I mean when a CPU has some feature then we can/should set it to true?

> > >

> > > The by-pieces infrastructure allows to inline builtins quite well and

> > > uses slow_unaligned_access and overlap_op_by_pieces to tune the

> > > emitted instruction sequence.

> > >

> > > In case unaligned accesses are fast, then overlap_op_by_pieces can reduce

> > > the number of instructions (emitted by by-pieces for e.g. memset) for the cost

> > > of overlapping memory accesses.

> > >

> > > E.g. you want to clear 15-bytes with memset (and cheap unaligned accesses):

> > > Without overlap_op_by_pieces you will get:

> > >   8e:   00053023                sd      zero,0(a0)

> > >   92:   00052423                sw      zero,8(a0)

> > >   96:   00051623                sh      zero,12(a0)

> > >   9a:   00050723                sb      zero,14(a0)

> > > With overlap_op_by_pieces you will get:

> > >   7e:   00053023                sd      zero,0(a0)

> > >   82:   000533a3                sd      zero,7(a0)

> > >

> > > BR

> > > Christoph

> > >

> > > >

> > > >

> > > > On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches

> > > > <gcc-patches@gcc.gnu.org> wrote:

> > > > >

> > > > > This patch adds the field overlap_op_by_pieces to the struct

> > > > > riscv_tune_param, which allows to enable the overlap_op_by_pieces

> > > > > feature of the by-pieces infrastructure.

> > > > >

> > > > > gcc/ChangeLog:

> > > > >

> > > > >         * config/riscv/riscv.c (struct riscv_tune_param): New field.

> > > > >         (riscv_overlap_op_by_pieces): New function.

> > > > >         (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to

> > > > >         riscv_overlap_op_by_pieces.

> > > > >

> > > > > Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>

> > > > > ---

> > > > >  gcc/config/riscv/riscv.c | 14 ++++++++++++++

> > > > >  1 file changed, 14 insertions(+)

> > > > >

> > > > > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c

> > > > > index 576960bb37c..824e930ef05 100644

> > > > > --- a/gcc/config/riscv/riscv.c

> > > > > +++ b/gcc/config/riscv/riscv.c

> > > > > @@ -220,6 +220,7 @@ struct riscv_tune_param

> > > > >    unsigned short branch_cost;

> > > > >    unsigned short memory_cost;

> > > > >    bool slow_unaligned_access;

> > > > > +  bool overlap_op_by_pieces;

> > > > >  };

> > > > >

> > > > >  /* Information about one micro-arch we know about.  */

> > > > > @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {

> > > > >    3,                                           /* branch_cost */

> > > > >    5,                                           /* memory_cost */

> > > > >    true,                                                /* slow_unaligned_access */

> > > > > +  false,                                       /* overlap_op_by_pieces */

> > > > >  };

> > > > >

> > > > >  /* Costs to use when optimizing for Sifive 7 Series.  */

> > > > > @@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {

> > > > >    4,                                           /* branch_cost */

> > > > >    3,                                           /* memory_cost */

> > > > >    true,                                                /* slow_unaligned_access */

> > > > > +  false,                                       /* overlap_op_by_pieces */

> > > > >  };

> > > > >

> > > > >  /* Costs to use when optimizing for T-HEAD c906.  */

> > > > > @@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {

> > > > >    3,            /* branch_cost */

> > > > >    5,            /* memory_cost */

> > > > >    false,            /* slow_unaligned_access */

> > > > > +  false,                                       /* overlap_op_by_pieces */

> > > > >  };

> > > > >

> > > > >  /* Costs to use when optimizing for size.  */

> > > > > @@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {

> > > > >    1,                                           /* branch_cost */

> > > > >    2,                                           /* memory_cost */

> > > > >    false,                                       /* slow_unaligned_access */

> > > > > +  false,                                       /* overlap_op_by_pieces */

> > > > >  };

> > > > >

> > > > >  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);

> > > > > @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)

> > > > >    return riscv_slow_unaligned_access_p;

> > > > >  }

> > > > >

> > > > > +static bool

> > > > > +riscv_overlap_op_by_pieces (void)

> > > > > +{

> > > > > +  return tune_param->overlap_op_by_pieces;

> > > > > +}

> > > > > +

> > > > >  /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */

> > > > >

> > > > >  static bool

> > > > > @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)

> > > > >  #undef TARGET_SLOW_UNALIGNED_ACCESS

> > > > >  #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access

> > > > >

> > > > > +#undef TARGET_OVERLAP_OP_BY_PIECES_P

> > > > > +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces

> > > > > +

> > > > >  #undef TARGET_SECONDARY_MEMORY_NEEDED

> > > > >  #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed

> > > > >

> > > > > --

> > > > > 2.31.1

> > > > >

Patch

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 576960bb37c..824e930ef05 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -220,6 +220,7 @@  struct riscv_tune_param
   unsigned short branch_cost;
   unsigned short memory_cost;
   bool slow_unaligned_access;
+  bool overlap_op_by_pieces;
 };
 
 /* Information about one micro-arch we know about.  */
@@ -285,6 +286,7 @@  static const struct riscv_tune_param rocket_tune_info = {
   3,						/* branch_cost */
   5,						/* memory_cost */
   true,						/* slow_unaligned_access */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -298,6 +300,7 @@  static const struct riscv_tune_param sifive_7_tune_info = {
   4,						/* branch_cost */
   3,						/* memory_cost */
   true,						/* slow_unaligned_access */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -311,6 +314,7 @@  static const struct riscv_tune_param thead_c906_tune_info = {
   3,            /* branch_cost */
   5,            /* memory_cost */
   false,            /* slow_unaligned_access */
+  false,					/* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for size.  */
@@ -324,6 +328,7 @@  static const struct riscv_tune_param optimize_size_tune_info = {
   1,						/* branch_cost */
   2,						/* memory_cost */
   false,					/* slow_unaligned_access */
+  false,					/* overlap_op_by_pieces */
 };
 
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
@@ -5201,6 +5206,12 @@  riscv_slow_unaligned_access (machine_mode, unsigned int)
   return riscv_slow_unaligned_access_p;
 }
 
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+  return tune_param->overlap_op_by_pieces;
+}
+
 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
 
 static bool
@@ -5525,6 +5536,9 @@  riscv_asan_shadow_offset (void)
 #undef TARGET_SLOW_UNALIGNED_ACCESS
 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
 
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
 #undef TARGET_SECONDARY_MEMORY_NEEDED
 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed