@@ -10457,37 +10457,73 @@ riscv_set_is_shNadduw (rtx set)
1045710457 && REG_P (SET_DEST (set)));
1045810458}
1045910459
10460+ /* Return TRUE if the target microarchitecture supports macro-op
10461+ fusion for two memory operations of mode MODE (the direction
10462+ of transfer is determined by the IS_LOAD parameter). */
10463+
10464+ static bool
10465+ pair_fusion_mode_allowed_p (machine_mode mode, bool is_load)
10466+ {
10467+ if (!riscv_is_micro_arch (arcv_rhx100))
10468+ return true;
10469+
10470+ return ((is_load && (mode == SImode
10471+ || mode == HImode
10472+ || mode == QImode))
10473+ || (!is_load && mode == SImode));
10474+ }
10475+
1046010476/* Return TRUE if two addresses can be fused. */
1046110477
1046210478static bool
10463- arcv_fused_addr_p (rtx addr0, rtx addr1)
10479+ arcv_fused_addr_p (rtx addr0, rtx addr1, bool is_load )
1046410480{
1046510481 rtx base0, base1, tmp;
1046610482 HOST_WIDE_INT off0 = 0, off1 = 0;
1046710483
10468- if (GET_CODE (addr0) == PLUS)
10484+ if (GET_CODE (addr0) == SIGN_EXTEND || GET_CODE (addr0) == ZERO_EXTEND)
10485+ addr0 = XEXP (addr0, 0);
10486+
10487+ if (GET_CODE (addr1) == SIGN_EXTEND || GET_CODE (addr1) == ZERO_EXTEND)
10488+ addr1 = XEXP (addr1, 0);
10489+
10490+ if (!MEM_P (addr0) || !MEM_P (addr1))
10491+ return false;
10492+
10493+ /* Require the accesses to have the same mode. */
10494+ if (GET_MODE (addr0) != GET_MODE (addr1))
10495+ return false;
10496+
10497+ /* Check if the mode is allowed. */
10498+ if (!pair_fusion_mode_allowed_p (GET_MODE (addr0), is_load))
10499+ return false;
10500+
10501+ rtx reg0 = XEXP (addr0, 0);
10502+ rtx reg1 = XEXP (addr1, 0);
10503+
10504+ if (GET_CODE (reg0) == PLUS)
1046910505 {
10470- base0 = XEXP (addr0 , 0);
10471- tmp = XEXP (addr0 , 1);
10506+ base0 = XEXP (reg0 , 0);
10507+ tmp = XEXP (reg0 , 1);
1047210508 if (!CONST_INT_P (tmp))
1047310509 return false;
1047410510 off0 = INTVAL (tmp);
1047510511 }
10476- else if (REG_P (addr0 ))
10477- base0 = addr0 ;
10512+ else if (REG_P (reg0 ))
10513+ base0 = reg0 ;
1047810514 else
1047910515 return false;
1048010516
10481- if (GET_CODE (addr1 ) == PLUS)
10517+ if (GET_CODE (reg1 ) == PLUS)
1048210518 {
10483- base1 = XEXP (addr1 , 0);
10484- tmp = XEXP (addr1 , 1);
10519+ base1 = XEXP (reg1 , 0);
10520+ tmp = XEXP (reg1 , 1);
1048510521 if (!CONST_INT_P (tmp))
1048610522 return false;
1048710523 off1 = INTVAL (tmp);
1048810524 }
10489- else if (REG_P (addr1 ))
10490- base1 = addr1 ;
10525+ else if (REG_P (reg1 ))
10526+ base1 = reg1 ;
1049110527 else
1049210528 return false;
1049310529
@@ -10496,9 +10532,9 @@ arcv_fused_addr_p (rtx addr0, rtx addr1)
1049610532 if (REGNO (base0) != REGNO (base1))
1049710533 return false;
1049810534
10499- /* Offsets have to be aligned to word boundary and adjacent in memory,
10500- but the memory operations can be narrower. */
10501- if ((off0 % UNITS_PER_WORD == 0) && (abs (off1 - off0) == UNITS_PER_WORD ))
10535+ /* Fuse adjacent aligned addresses. */
10536+ if ((off0 % GET_MODE_SIZE (GET_MODE (addr0)).to_constant () == 0)
10537+ && (abs (off1 - off0) == GET_MODE_SIZE (GET_MODE (addr0)).to_constant () ))
1050210538 return true;
1050310539
1050410540 return false;
@@ -10651,20 +10687,14 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1065110687 if (get_attr_type (prev) == TYPE_LOAD
1065210688 && get_attr_type (curr) == TYPE_LOAD)
1065310689 {
10654- rtx addr0 = XEXP (SET_SRC (prev_set), 0);
10655- rtx addr1 = XEXP (SET_SRC (curr_set), 0);
10656-
10657- if (arcv_fused_addr_p (addr0, addr1))
10690+ if (arcv_fused_addr_p (SET_SRC (prev_set), SET_SRC (curr_set), true))
1065810691 return true;
1065910692 }
1066010693
1066110694 if (get_attr_type (prev) == TYPE_STORE
1066210695 && get_attr_type (curr) == TYPE_STORE)
1066310696 {
10664- rtx addr0 = XEXP (SET_DEST (prev_set), 0);
10665- rtx addr1 = XEXP (SET_DEST (curr_set), 0);
10666-
10667- if (arcv_fused_addr_p (addr0, addr1))
10697+ if (arcv_fused_addr_p (SET_DEST (prev_set), SET_DEST (curr_set), false))
1066810698 return true;
1066910699 }
1067010700
@@ -10674,21 +10704,19 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1067410704 && get_attr_type (curr) == TYPE_LOAD
1067510705 && get_attr_type (next_insn (curr)) == TYPE_LOAD)
1067610706 {
10677- rtx addr0 = XEXP (SET_SRC (curr_set), 0);
10678- rtx addr1 = XEXP (SET_SRC (single_set (next_insn (curr))), 0);
10679-
10680- if (arcv_fused_addr_p (addr0, addr1))
10707+ if (arcv_fused_addr_p (SET_SRC (curr_set),
10708+ SET_SRC (single_set (next_insn (curr))),
10709+ true))
1068110710 return false;
1068210711 }
1068310712
1068410713 if (next_insn (curr) && single_set (next_insn (curr))
1068510714 && get_attr_type (curr) == TYPE_STORE
1068610715 && get_attr_type (next_insn (curr)) == TYPE_STORE)
1068710716 {
10688- rtx addr0 = XEXP (SET_DEST (curr_set), 0);
10689- rtx addr1 = XEXP (SET_DEST (single_set (next_insn (curr))), 0);
10690-
10691- if (arcv_fused_addr_p (addr0, addr1))
10717+ if (arcv_fused_addr_p (SET_DEST (curr_set),
10718+ SET_DEST (single_set (next_insn (curr))),
10719+ false))
1069210720 return false;
1069310721 }
1069410722
@@ -11359,7 +11387,8 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1135911387 otherwise return FALSE. */
1136011388
1136111389static bool
11362- fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
11390+ fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, machine_mode *mode,
11391+ bool *is_load)
1136311392{
1136411393 rtx x, dest, src;
1136511394
@@ -11370,15 +11399,22 @@ fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
1137011399
1137111400 src = SET_SRC (x);
1137211401 dest = SET_DEST (x);
11402+
11403+ if ((GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
11404+ && MEM_P (XEXP (src, 0)))
11405+ src = XEXP (src, 0);
11406+
1137311407 if (REG_P (src) && MEM_P (dest))
1137411408 {
1137511409 *is_load = false;
11376- extract_base_offset_in_addr (dest, base, offset);
11410+ if (extract_base_offset_in_addr (dest, base, offset))
11411+ *mode = GET_MODE (dest);
1137711412 }
1137811413 else if (MEM_P (src) && REG_P (dest))
1137911414 {
1138011415 *is_load = true;
11381- extract_base_offset_in_addr (src, base, offset);
11416+ if (extract_base_offset_in_addr (src, base, offset))
11417+ *mode = GET_MODE (src);
1138211418 }
1138311419 else
1138411420 return false;
@@ -11393,11 +11429,13 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri,
1139311429 int tmp, off_val;
1139411430 bool is_load;
1139511431 rtx base, offset;
11432+ machine_mode mode = SImode;
1139611433
1139711434 gcc_assert (INSN_P (insn));
1139811435
1139911436 tmp = max_pri - 1;
11400- if (!fusion_load_store (insn, &base, &offset, &is_load))
11437+ if (!fusion_load_store (insn, &base, &offset, &mode, &is_load)
11438+ || !pair_fusion_mode_allowed_p (mode, is_load))
1140111439 {
1140211440 *pri = tmp;
1140311441 *fusion_pri = tmp;
@@ -11406,6 +11444,11 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri,
1140611444
1140711445 tmp /= 2;
1140811446
11447+ if (mode == HImode)
11448+ tmp /= 2;
11449+ else if (mode == QImode)
11450+ tmp /= 4;
11451+
1140911452 /* INSN with smaller base register goes first. */
1141011453 tmp -= ((REGNO (base) & 0xff) << 20);
1141111454
@@ -11414,7 +11457,9 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri,
1141411457
1141511458 /* Put loads/stores operating on adjacent words into the same
1141611459 * scheduling group. */
11417- *fusion_pri = tmp - ((off_val / (UNITS_PER_WORD * 2)) << 1) + is_load;
11460+ *fusion_pri = tmp
11461+ - ((off_val / (GET_MODE_SIZE (mode).to_constant () * 2)) << 1)
11462+ + is_load;
1141811463
1141911464 if (off_val >= 0)
1142011465 tmp -= (off_val & 0xfffff);
0 commit comments