[3/4,committed] arc: Update ctz/clz patterns

Message ID 20210510063136.906608-3-claziss@synopsys.com
State New
Headers show
Series
  • [1/4,committed] arc: Fix documentation __builtin_arc_sr
Related show

Commit Message

ARCv2 ISA introduces special clz/ctz instructions. This patch is
adding support for them when available.

Corner case:
mov            r0,0x0 : (w0) r0 <= 0x00000000 *
ffs            r1,r0 : (w0) r1 <= 0x0000001f *
fls            r2,r0 : (w0) r2 <= 0x00000000 *

gcc/
2021-05-10  Claudiu Zissulescu  <claziss@synopsys.com>

	* gcc/config/arc/arc.h (CLZ_DEFINED_VALUE_AT_ZERO): Define.
	(CTZ_DEFINED_VALUE_AT_ZERO): Likewise.
	* gcc/config/arc/arc.md (clrsbsi2): Cleanup pattern.
	(norm_f): Likewise.
	(ffs): Likewise.
	(ffs_f): Likewise.
	(clzsi2): Use fls instruction when available.
	(arc_clzsi2): Likewise.

Signed-off-by: Claudiu Zissulescu <claziss@synopsys.com>

---
 gcc/config/arc/arc.h  |  6 +++++
 gcc/config/arc/arc.md | 53 ++++++++++++++++++++++++-------------------
 2 files changed, 36 insertions(+), 23 deletions(-)

-- 
2.30.2

Patch

diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
index b7b34734f24..bd1fe0abd7a 100644
--- a/gcc/config/arc/arc.h
+++ b/gcc/config/arc/arc.h
@@ -1445,6 +1445,12 @@  do { \
 */
 #define SHIFT_COUNT_TRUNCATED 1
 
+/* Defines if the CLZ result is undefined or has a useful value.  */
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = 31, 2)
+
+/* Defines if the CTZ result is undefined or has a useful value.  */
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = 31, 2)
+
 /* We assume that the store-condition-codes instructions store 0 for false
    and some other value for true.  This is the value stored for true.  */
 #define STORE_FLAG_VALUE 1
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 7a52551eef5..f3efe65ca2f 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -4396,24 +4396,20 @@  (define_peephole2
 ;; Instructions generated through builtins
 
 (define_insn "clrsbsi2"
-  [(set (match_operand:SI  0 "dest_reg_operand" "=w,w")
-	(clrsb:SI (match_operand:SI 1 "general_operand" "cL,Cal")))]
+  [(set (match_operand:SI  0 "dest_reg_operand" "=r,r")
+	(clrsb:SI (match_operand:SI 1 "general_operand" "rL,Cal")))]
   "TARGET_NORM"
-  "@
-   norm \t%0, %1
-   norm \t%0, %1"
+  "norm\\t%0,%1"
   [(set_attr "length" "4,8")
    (set_attr "type" "two_cycle_core,two_cycle_core")])
 
 (define_insn "norm_f"
-  [(set (match_operand:SI  0 "dest_reg_operand" "=w,w")
-	(clrsb:SI (match_operand:SI 1 "general_operand" "cL,Cal")))
+  [(set (match_operand:SI  0 "dest_reg_operand" "=r,r")
+	(clrsb:SI (match_operand:SI 1 "general_operand" "rL,Cal")))
    (set (reg:CC_ZN CC_REG)
 	(compare:CC_ZN (match_dup 1) (const_int 0)))]
   "TARGET_NORM"
-  "@
-   norm.f\t%0, %1
-   norm.f\t%0, %1"
+  "norm.f\\t%0,%1"
   [(set_attr "length" "4,8")
    (set_attr "type" "two_cycle_core,two_cycle_core")])
 
@@ -4443,7 +4439,17 @@  (define_expand "clzsi2"
 	  (clz:SI (match_operand:SI 1 "register_operand" "")))
      (clobber (match_dup 2))])]
   "TARGET_NORM"
-  "operands[2] = gen_rtx_REG (CC_ZNmode, CC_REG);")
+  "
+   if (TARGET_V2)
+    {
+      /* ARCv2's FLS is a bit more optimal than using norm.  */
+      rtx tmp = gen_reg_rtx (SImode);
+      emit_insn (gen_fls (tmp, operands[1]));
+      emit_insn (gen_subsi3 (operands[0], GEN_INT (31), tmp));
+      DONE;
+    }
+   operands[2] = gen_rtx_REG (CC_ZNmode, CC_REG);
+  ")
 
 (define_insn_and_split "*arc_clzsi2"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -4475,8 +4481,13 @@  (define_expand "ctzsi2"
    (match_operand:SI 1 "register_operand" "")]
   "TARGET_NORM"
   "
-  emit_insn (gen_arc_ctzsi2 (operands[0], operands[1]));
-  DONE;
+   if (TARGET_V2)
+    {
+      emit_insn (gen_ffs (operands[0], operands[1]));
+      DONE;
+    }
+   emit_insn (gen_arc_ctzsi2 (operands[0], operands[1]));
+   DONE;
 ")
 
 (define_insn_and_split "arc_ctzsi2"
@@ -5575,26 +5586,22 @@  (define_insn "clri"
    (set_attr "type" "misc")])
 
 (define_insn "ffs"
-  [(set (match_operand:SI  0 "dest_reg_operand" "=w,w")
-	(unspec:SI [(match_operand:SI 1 "general_operand" "cL,Cal")]
+  [(set (match_operand:SI  0 "dest_reg_operand" "=r,r")
+	(unspec:SI [(match_operand:SI 1 "general_operand" "rL,Cal")]
 			    UNSPEC_ARC_FFS))]
   "TARGET_NORM && TARGET_V2"
-  "@
-   ffs \t%0, %1
-   ffs \t%0, %1"
+  "ffs\\t%0,%1"
   [(set_attr "length" "4,8")
    (set_attr "type" "two_cycle_core,two_cycle_core")])
 
 (define_insn "ffs_f"
-  [(set (match_operand:SI  0 "dest_reg_operand" "=w,w")
-	(unspec:SI [(match_operand:SI 1 "general_operand" "cL,Cal")]
+  [(set (match_operand:SI  0 "dest_reg_operand" "=r,r")
+	(unspec:SI [(match_operand:SI 1 "general_operand" "rL,Cal")]
 			    UNSPEC_ARC_FFS))
    (set (reg:CC_ZN CC_REG)
 	(compare:CC_ZN (match_dup 1) (const_int 0)))]
   "TARGET_NORM && TARGET_V2"
-  "@
-   ffs.f\t%0, %1
-   ffs.f\t%0, %1"
+  "ffs.f\\t%0,%1"
   [(set_attr "length" "4,8")
    (set_attr "type" "two_cycle_core,two_cycle_core")])