@@ -11084,37 +11084,73 @@ riscv_set_is_shNadduw (rtx set)
1108411084 && REG_P (SET_DEST (set)));
1108511085}
1108611086
11087+ /* Return TRUE if the target microarchitecture supports macro-op
11088+ fusion for two memory operations of mode MODE (the direction
11089+ of transfer is determined by the IS_LOAD parameter). */
11090+
11091+ static bool
11092+ pair_fusion_mode_allowed_p (machine_mode mode, bool is_load)
11093+ {
11094+ if (!riscv_is_micro_arch (arcv_rhx100))
11095+ return true;
11096+
11097+ return ((is_load && (mode == SImode
11098+ || mode == HImode
11099+ || mode == QImode))
11100+ || (!is_load && mode == SImode));
11101+ }
11102+
1108711103/* Return TRUE if two addresses can be fused. */
1108811104
1108911105static bool
11090- arcv_fused_addr_p (rtx addr0, rtx addr1)
11106+ arcv_fused_addr_p (rtx addr0, rtx addr1, bool is_load )
1109111107{
1109211108 rtx base0, base1, tmp;
1109311109 HOST_WIDE_INT off0 = 0, off1 = 0;
1109411110
11095- if (GET_CODE (addr0) == PLUS)
11111+ if (GET_CODE (addr0) == SIGN_EXTEND || GET_CODE (addr0) == ZERO_EXTEND)
11112+ addr0 = XEXP (addr0, 0);
11113+
11114+ if (GET_CODE (addr1) == SIGN_EXTEND || GET_CODE (addr1) == ZERO_EXTEND)
11115+ addr1 = XEXP (addr1, 0);
11116+
11117+ if (!MEM_P (addr0) || !MEM_P (addr1))
11118+ return false;
11119+
11120+ /* Require the accesses to have the same mode. */
11121+ if (GET_MODE (addr0) != GET_MODE (addr1))
11122+ return false;
11123+
11124+ /* Check if the mode is allowed. */
11125+ if (!pair_fusion_mode_allowed_p (GET_MODE (addr0), is_load))
11126+ return false;
11127+
11128+ rtx reg0 = XEXP (addr0, 0);
11129+ rtx reg1 = XEXP (addr1, 0);
11130+
11131+ if (GET_CODE (reg0) == PLUS)
1109611132 {
11097- base0 = XEXP (addr0 , 0);
11098- tmp = XEXP (addr0 , 1);
11133+ base0 = XEXP (reg0 , 0);
11134+ tmp = XEXP (reg0 , 1);
1109911135 if (!CONST_INT_P (tmp))
1110011136 return false;
1110111137 off0 = INTVAL (tmp);
1110211138 }
11103- else if (REG_P (addr0 ))
11104- base0 = addr0 ;
11139+ else if (REG_P (reg0 ))
11140+ base0 = reg0 ;
1110511141 else
1110611142 return false;
1110711143
11108- if (GET_CODE (addr1 ) == PLUS)
11144+ if (GET_CODE (reg1 ) == PLUS)
1110911145 {
11110- base1 = XEXP (addr1 , 0);
11111- tmp = XEXP (addr1 , 1);
11146+ base1 = XEXP (reg1 , 0);
11147+ tmp = XEXP (reg1 , 1);
1111211148 if (!CONST_INT_P (tmp))
1111311149 return false;
1111411150 off1 = INTVAL (tmp);
1111511151 }
11116- else if (REG_P (addr1 ))
11117- base1 = addr1 ;
11152+ else if (REG_P (reg1 ))
11153+ base1 = reg1 ;
1111811154 else
1111911155 return false;
1112011156
@@ -11123,9 +11159,9 @@ arcv_fused_addr_p (rtx addr0, rtx addr1)
1112311159 if (REGNO (base0) != REGNO (base1))
1112411160 return false;
1112511161
11126- /* Offsets have to be aligned to word boundary and adjacent in memory,
11127- but the memory operations can be narrower. */
11128- if ((off0 % UNITS_PER_WORD == 0) && (abs (off1 - off0) == UNITS_PER_WORD ))
11162+ /* Fuse adjacent aligned addresses. */
11163+ if ((off0 % GET_MODE_SIZE (GET_MODE (addr0)).to_constant () == 0)
11164+ && (abs (off1 - off0) == GET_MODE_SIZE (GET_MODE (addr0)).to_constant () ))
1112911165 return true;
1113011166
1113111167 return false;
@@ -11278,20 +11314,14 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1127811314 if (get_attr_type (prev) == TYPE_LOAD
1127911315 && get_attr_type (curr) == TYPE_LOAD)
1128011316 {
11281- rtx addr0 = XEXP (SET_SRC (prev_set), 0);
11282- rtx addr1 = XEXP (SET_SRC (curr_set), 0);
11283-
11284- if (arcv_fused_addr_p (addr0, addr1))
11317+ if (arcv_fused_addr_p (SET_SRC (prev_set), SET_SRC (curr_set), true))
1128511318 return true;
1128611319 }
1128711320
1128811321 if (get_attr_type (prev) == TYPE_STORE
1128911322 && get_attr_type (curr) == TYPE_STORE)
1129011323 {
11291- rtx addr0 = XEXP (SET_DEST (prev_set), 0);
11292- rtx addr1 = XEXP (SET_DEST (curr_set), 0);
11293-
11294- if (arcv_fused_addr_p (addr0, addr1))
11324+ if (arcv_fused_addr_p (SET_DEST (prev_set), SET_DEST (curr_set), false))
1129511325 return true;
1129611326 }
1129711327
@@ -11301,21 +11331,19 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1130111331 && get_attr_type (curr) == TYPE_LOAD
1130211332 && get_attr_type (next_insn (curr)) == TYPE_LOAD)
1130311333 {
11304- rtx addr0 = XEXP (SET_SRC (curr_set), 0);
11305- rtx addr1 = XEXP (SET_SRC (single_set (next_insn (curr))), 0);
11306-
11307- if (arcv_fused_addr_p (addr0, addr1))
11334+ if (arcv_fused_addr_p (SET_SRC (curr_set),
11335+ SET_SRC (single_set (next_insn (curr))),
11336+ true))
1130811337 return false;
1130911338 }
1131011339
1131111340 if (next_insn (curr) && single_set (next_insn (curr))
1131211341 && get_attr_type (curr) == TYPE_STORE
1131311342 && get_attr_type (next_insn (curr)) == TYPE_STORE)
1131411343 {
11315- rtx addr0 = XEXP (SET_DEST (curr_set), 0);
11316- rtx addr1 = XEXP (SET_DEST (single_set (next_insn (curr))), 0);
11317-
11318- if (arcv_fused_addr_p (addr0, addr1))
11344+ if (arcv_fused_addr_p (SET_DEST (curr_set),
11345+ SET_DEST (single_set (next_insn (curr))),
11346+ false))
1131911347 return false;
1132011348 }
1132111349
@@ -11986,7 +12014,8 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1198612014 otherwise return FALSE. */
1198712015
1198812016static bool
11989- fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
12017+ fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, machine_mode *mode,
12018+ bool *is_load)
1199012019{
1199112020 rtx x, dest, src;
1199212021
@@ -11997,15 +12026,22 @@ fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
1199712026
1199812027 src = SET_SRC (x);
1199912028 dest = SET_DEST (x);
12029+
12030+ if ((GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
12031+ && MEM_P (XEXP (src, 0)))
12032+ src = XEXP (src, 0);
12033+
1200012034 if (REG_P (src) && MEM_P (dest))
1200112035 {
1200212036 *is_load = false;
12003- extract_base_offset_in_addr (dest, base, offset);
12037+ if (extract_base_offset_in_addr (dest, base, offset))
12038+ *mode = GET_MODE (dest);
1200412039 }
1200512040 else if (MEM_P (src) && REG_P (dest))
1200612041 {
1200712042 *is_load = true;
12008- extract_base_offset_in_addr (src, base, offset);
12043+ if (extract_base_offset_in_addr (src, base, offset))
12044+ *mode = GET_MODE (src);
1200912045 }
1201012046 else
1201112047 return false;
@@ -12020,11 +12056,13 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri,
1202012056 int tmp, off_val;
1202112057 bool is_load;
1202212058 rtx base, offset;
12059+ machine_mode mode = SImode;
1202312060
1202412061 gcc_assert (INSN_P (insn));
1202512062
1202612063 tmp = max_pri - 1;
12027- if (!fusion_load_store (insn, &base, &offset, &is_load))
12064+ if (!fusion_load_store (insn, &base, &offset, &mode, &is_load)
12065+ || !pair_fusion_mode_allowed_p (mode, is_load))
1202812066 {
1202912067 *pri = tmp;
1203012068 *fusion_pri = tmp;
@@ -12033,6 +12071,11 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri,
1203312071
1203412072 tmp /= 2;
1203512073
12074+ if (mode == HImode)
12075+ tmp /= 2;
12076+ else if (mode == QImode)
12077+ tmp /= 4;
12078+
1203612079 /* INSN with smaller base register goes first. */
1203712080 tmp -= ((REGNO (base) & 0xff) << 20);
1203812081
@@ -12041,7 +12084,9 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri,
1204112084
1204212085 /* Put loads/stores operating on adjacent words into the same
1204312086 * scheduling group. */
12044- *fusion_pri = tmp - ((off_val / (UNITS_PER_WORD * 2)) << 1) + is_load;
12087+ *fusion_pri = tmp
12088+ - ((off_val / (GET_MODE_SIZE (mode).to_constant () * 2)) << 1)
12089+ + is_load;
1204512090
1204612091 if (off_val >= 0)
1204712092 tmp -= (off_val & 0xfffff);
0 commit comments