[RISCV] Update the ProcResource used by vsetvli in SiFiveP400 model#199802
[RISCV] Update the ProcResource used by vsetvli in SiFiveP400 model#199802mshockwave wants to merge 3 commits into
Conversation
Co-Authored-By: Craig Topper <craig.topper@sifive.com>
Co-Authored-By: Craig Topper <craig.topper@sifive.com>
|
@llvm/pr-subscribers-backend-risc-v Author: Min-Yih Hsu (mshockwave) ChangesStacks on top of #199796 The vsetvli instruction (and friends) should use the first integer pipe (i.e. SiFiveP400IEXQ0) Patch is 5.92 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/199802.diff 25 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive8.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive8.td
new file mode 100644
index 0000000000000..3085f078fc40c
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive8.td
@@ -0,0 +1,862 @@
+//==----- RISCVSchedSiFive8.td - SiFive8 Scheduling Defs -----*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the common bits of scheduling models for P400, P500, P600,
+// and P800 series cores.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_RISCV_SIFIVE_RISCVSCHEDSIFIVE8_TD
+#define LLVM_TARGET_RISCV_SIFIVE_RISCVSCHEDSIFIVE8_TD
+
+multiclass SiFive8ProcResources<int num_iex, int num_fex, int num_vex = 1,
+ bit shared_ldst_pipes = false,
+ bit dedicated_ld_pipe = false,
+ int num_branch_only_iex = 0> {
+ // Integer pipes
+ foreach i = !range(num_iex) in
+ def IEXQ#i : ProcResource<1>;
+
+ if !gt(num_iex, 1) then
+ def IntArith : ProcResGroup<!foreach(i, !range(!sub(num_iex, num_branch_only_iex)),
+ !cast<ProcResource>(NAME#"IEXQ"#i))>;
+ def Div : ProcResource<1>;
+
+ // Floating point pipes
+ foreach i = !range(num_fex) in
+ def FEXQ#i : ProcResource<1>;
+
+ if !gt(num_fex, 1) then
+ def FloatArith : ProcResGroup<!foreach(i, !range(num_fex),
+ !cast<ProcResource>(NAME#"FEXQ"#i))>;
+ def FloatDiv : ProcResource<1>;
+
+ // Load/Store units
+ if shared_ldst_pipes then {
+ def LDST : ProcResource<2>;
+ } else {
+ def Load : ProcResource<1>;
+ def Store : ProcResource<1>;
+ }
+
+ if dedicated_ld_pipe then
+ def LD : ProcResource<1>;
+
+ // Vector pipes
+ foreach i = !range(num_vex) in
+ def VEXQ#i : ProcResource<1>;
+
+ if !gt(num_vex, 1) then
+ def VectorArith : ProcResGroup<!foreach(i, !range(num_vex),
+ !cast<ProcResource>(NAME#"VEXQ"#i))>;
+
+ if !gt(num_vex, 0) then {
+ def VLD : ProcResource<1>;
+ def VST : ProcResource<1>;
+ def VDiv : ProcResource<1>;
+ def VFloatDiv : ProcResource<1>;
+ }
+}
+
+class SiFive8IntProcResources {
+ ProcResourceKind Arith;
+ ProcResourceKind Mul;
+ ProcResourceKind I2F;
+ list<ProcResourceKind> Div;
+ ProcResourceKind Branch;
+ ProcResourceKind SYS;
+ ProcResourceKind VSet;
+ ProcResourceKind CMOV;
+}
+
+class SiFive8FloatProcResources {
+ ProcResourceKind Arith;
+ list<ProcResourceKind> Div;
+ ProcResourceKind F2I;
+}
+
+class SiFive8LSUProcResources {
+ ProcResourceKind Load;
+ ProcResourceKind Store;
+}
+
+class SiFive8VecProcResources {
+ ProcResourceKind Arith;
+ ProcResourceKind Load;
+ ProcResourceKind Store;
+ list<ProcResourceKind> Div;
+ list<ProcResourceKind> FloatDiv;
+}
+
+defvar SiFive8VLEN = 128;
+
+multiclass SiFive8BaseWriteRes<SiFive8IntProcResources IntProcRes,
+ SiFive8FloatProcResources FloatProcRes,
+ SiFive8LSUProcResources LSUProcRes,
+ bit legacy_imul_latency = false> {
+
+ // Integer arithmetic and logic
+ def : WriteRes<WriteIALU, [IntProcRes.Arith]>;
+ def : WriteRes<WriteIALU32, [IntProcRes.Arith]>;
+ def : WriteRes<WriteShiftImm, [IntProcRes.Arith]>;
+ def : WriteRes<WriteShiftImm32, [IntProcRes.Arith]>;
+ def : WriteRes<WriteShiftReg, [IntProcRes.Arith]>;
+ def : WriteRes<WriteShiftReg32, [IntProcRes.Arith]>;
+ // Branching
+ def : WriteRes<WriteJmp, [IntProcRes.Branch]>;
+ def : WriteRes<WriteJal, [IntProcRes.Branch]>;
+ def : WriteRes<WriteJalr, [IntProcRes.Branch]>;
+
+ // CMOV
+ def WriteCMOV : SchedWriteRes<[IntProcRes.Branch, IntProcRes.CMOV]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ }
+ def : InstRW<[!cast<SchedWriteRes>(NAME#"WriteCMOV")], (instrs PseudoCCMOVGPRNoX0)>;
+
+ let Latency = !if(legacy_imul_latency, 3, 2) in {
+ // Integer multiplication
+ def : WriteRes<WriteIMul, [IntProcRes.Mul]>;
+ def : WriteRes<WriteIMul32, [IntProcRes.Mul]>;
+ // cpop[w] look exactly like multiply.
+ def : WriteRes<WriteCPOP, [IntProcRes.Mul]>;
+ def : WriteRes<WriteCPOP32, [IntProcRes.Mul]>;
+ }
+
+ // Integer division
+ def : WriteRes<WriteIDiv, IntProcRes.Div> {
+ let Latency = 35;
+ let ReleaseAtCycles = [1, 34];
+ }
+ def : WriteRes<WriteIDiv32, IntProcRes.Div> {
+ let Latency = 20;
+ let ReleaseAtCycles = [1, 19];
+ }
+
+ // Integer remainder
+ def : WriteRes<WriteIRem, IntProcRes.Div> {
+ let Latency = 35;
+ let ReleaseAtCycles = [1, 34];
+ }
+ def : WriteRes<WriteIRem32, IntProcRes.Div> {
+ let Latency = 20;
+ let ReleaseAtCycles = [1, 19];
+ }
+
+ // Bitmanip
+ def : WriteRes<WriteRotateImm, [IntProcRes.Arith]>;
+ def : WriteRes<WriteRotateImm32, [IntProcRes.Arith]>;
+ def : WriteRes<WriteRotateReg, [IntProcRes.Arith]>;
+ def : WriteRes<WriteRotateReg32, [IntProcRes.Arith]>;
+
+ def : WriteRes<WriteCLZ, [IntProcRes.Arith]>;
+ def : WriteRes<WriteCLZ32, [IntProcRes.Arith]>;
+ def : WriteRes<WriteCTZ, [IntProcRes.Arith]>;
+ def : WriteRes<WriteCTZ32, [IntProcRes.Arith]>;
+
+ def : WriteRes<WriteORCB, [IntProcRes.Arith]>;
+ def : WriteRes<WriteIMinMax, [IntProcRes.Arith]>;
+
+ def : WriteRes<WriteREV8, [IntProcRes.Arith]>;
+
+ def : WriteRes<WriteSHXADD, [IntProcRes.Arith]>;
+ def : WriteRes<WriteSHXADD32, [IntProcRes.Arith]>;
+
+ def : WriteRes<WriteSingleBit, [IntProcRes.Arith]>;
+ def : WriteRes<WriteSingleBitImm, [IntProcRes.Arith]>;
+ def : WriteRes<WriteBEXT, [IntProcRes.Arith]>;
+ def : WriteRes<WriteBEXTI, [IntProcRes.Arith]>;
+
+ // Memory
+ def : WriteRes<WriteSTB, [LSUProcRes.Store]>;
+ def : WriteRes<WriteSTH, [LSUProcRes.Store]>;
+ def : WriteRes<WriteSTW, [LSUProcRes.Store]>;
+ def : WriteRes<WriteSTD, [LSUProcRes.Store]>;
+ def : WriteRes<WriteFST16, [LSUProcRes.Store]>;
+ def : WriteRes<WriteFST32, [LSUProcRes.Store]>;
+ def : WriteRes<WriteFST64, [LSUProcRes.Store]>;
+
+ let Latency = 4 in {
+ def : WriteRes<WriteLDB, [LSUProcRes.Load]>;
+ def : WriteRes<WriteLDH, [LSUProcRes.Load]>;
+ }
+ let Latency = 4 in {
+ def : WriteRes<WriteLDW, [LSUProcRes.Load]>;
+ def : WriteRes<WriteLDD, [LSUProcRes.Load]>;
+ }
+
+ let Latency = 5 in {
+ def : WriteRes<WriteFLD16, [LSUProcRes.Load]>;
+ def : WriteRes<WriteFLD32, [LSUProcRes.Load]>;
+ def : WriteRes<WriteFLD64, [LSUProcRes.Load]>;
+ }
+
+ // Atomic memory
+ let Latency = 3 in {
+ def : WriteRes<WriteAtomicSTW, [LSUProcRes.Store]>;
+ def : WriteRes<WriteAtomicSTD, [LSUProcRes.Store]>;
+ def : WriteRes<WriteAtomicW, [LSUProcRes.Load]>;
+ def : WriteRes<WriteAtomicD, [LSUProcRes.Load]>;
+ def : WriteRes<WriteAtomicLDW, [LSUProcRes.Load]>;
+ def : WriteRes<WriteAtomicLDD, [LSUProcRes.Load]>;
+ }
+
+ // Floating point
+ let Latency = 4 in {
+ def : WriteRes<WriteFMA16, [FloatProcRes.Arith]>;
+ def : WriteRes<WriteFMA32, [FloatProcRes.Arith]>;
+ def : WriteRes<WriteFMA64, [FloatProcRes.Arith]>;
+ }
+
+ let Latency = 2 in {
+ def : WriteRes<WriteFSGNJ16, [FloatProcRes.Arith]>;
+ def : WriteRes<WriteFSGNJ32, [FloatProcRes.Arith]>;
+ def : WriteRes<WriteFSGNJ64, [FloatProcRes.Arith]>;
+
+ def : WriteRes<WriteFMinMax16, [FloatProcRes.Arith]>;
+ def : WriteRes<WriteFMinMax32, [FloatProcRes.Arith]>;
+ def : WriteRes<WriteFMinMax64, [FloatProcRes.Arith]>;
+ }
+
+ // Half precision.
+ def : WriteRes<WriteFSqrt16, FloatProcRes.Div> {
+ let Latency = 18;
+ let ReleaseAtCycles = [1, 17];
+ }
+
+ // Single precision.
+ def : WriteRes<WriteFSqrt32, FloatProcRes.Div> {
+ let Latency = 18;
+ let ReleaseAtCycles = [1, 17];
+ }
+
+ // Double precision
+ def : WriteRes<WriteFSqrt64, FloatProcRes.Div> {
+ let Latency = 33;
+ let ReleaseAtCycles = [1, 32];
+ }
+
+ // Conversions
+ let Latency = 2 in {
+ def : WriteRes<WriteFCvtI32ToF16, [IntProcRes.I2F]>;
+ def : WriteRes<WriteFCvtI32ToF32, [IntProcRes.I2F]>;
+ def : WriteRes<WriteFCvtI32ToF64, [IntProcRes.I2F]>;
+ def : WriteRes<WriteFCvtI64ToF16, [IntProcRes.I2F]>;
+ def : WriteRes<WriteFCvtI64ToF32, [IntProcRes.I2F]>;
+ def : WriteRes<WriteFCvtI64ToF64, [IntProcRes.I2F]>;
+ def : WriteRes<WriteFCvtF16ToI32, [FloatProcRes.F2I]>;
+ def : WriteRes<WriteFCvtF16ToI64, [FloatProcRes.F2I]>;
+ def : WriteRes<WriteFCvtF16ToF32, [FloatProcRes.Arith]>;
+ def : WriteRes<WriteFCvtF16ToF64, [FloatProcRes.Arith]>;
+ def : WriteRes<WriteFCvtF32ToI32, [FloatProcRes.F2I]>;
+ def : WriteRes<WriteFCvtF32ToI64, [FloatProcRes.F2I]>;
+ def : WriteRes<WriteFCvtF32ToF16, [FloatProcRes.Arith]>;
+ def : WriteRes<WriteFCvtF32ToF64, [FloatProcRes.Arith]>;
+ def : WriteRes<WriteFCvtF64ToI32, [FloatProcRes.F2I]>;
+ def : WriteRes<WriteFCvtF64ToI64, [FloatProcRes.F2I]>;
+ def : WriteRes<WriteFCvtF64ToF16, [FloatProcRes.Arith]>;
+ def : WriteRes<WriteFCvtF64ToF32, [FloatProcRes.Arith]>;
+ // FROUND/FROUNDNX are implemented by `FCVT.f.f`.
+ def : WriteRes<WriteFRoundF16, [FloatProcRes.Arith]>;
+ def : WriteRes<WriteFRoundF32, [FloatProcRes.Arith]>;
+ def : WriteRes<WriteFRoundF64, [FloatProcRes.Arith]>;
+
+ def : WriteRes<WriteFClass16, [FloatProcRes.F2I]>;
+ def : WriteRes<WriteFClass32, [FloatProcRes.F2I]>;
+ def : WriteRes<WriteFClass64, [FloatProcRes.F2I]>;
+ def : WriteRes<WriteFCmp16, [FloatProcRes.F2I]>;
+ def : WriteRes<WriteFCmp32, [FloatProcRes.F2I]>;
+ def : WriteRes<WriteFCmp64, [FloatProcRes.F2I]>;
+ def : WriteRes<WriteFMovI16ToF16, [IntProcRes.I2F]>;
+ def : WriteRes<WriteFMovF16ToI16, [FloatProcRes.F2I]>;
+ def : WriteRes<WriteFMovI32ToF32, [IntProcRes.I2F]>;
+ def : WriteRes<WriteFMovF32ToI32, [FloatProcRes.F2I]>;
+ def : WriteRes<WriteFMovI64ToF64, [IntProcRes.I2F]>;
+ def : WriteRes<WriteFMovF64ToI64, [FloatProcRes.F2I]>;
+ // FLI is implemented by `FMV.f.x`.
+ def : WriteRes<WriteFLI16, [IntProcRes.I2F]>;
+ def : WriteRes<WriteFLI32, [IntProcRes.I2F]>;
+ def : WriteRes<WriteFLI64, [IntProcRes.I2F]>;
+ }
+
+ // Others
+ def : WriteRes<WriteCSR, [IntProcRes.SYS]>;
+ def : WriteRes<WriteNop, []>;
+
+ // FIXME: This could be better modeled by looking at the regclasses of the operands.
+ def : InstRW<[WriteIALU, ReadIALU], (instrs COPY)>;
+}
+
+/// c is true if mx has the worst case behavior compared to LMULs in MxList.
+/// In SiFive8Vec, the worst case LMUL is the Largest LMUL
+/// and the worst case sew is the smallest SEW for that LMUL.
+class SiFive8VecIsWorstCaseMX<string mx, list<string> MxList> {
+ string LLMUL = LargestLMUL<MxList>.r;
+ bit c = !eq(mx, LLMUL);
+}
+
+class SiFive8VecIsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0> {
+ string LLMUL = LargestLMUL<MxList>.r;
+ int SSEW = SmallestSEW<mx, isF>.r;
+ bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
+}
+
+// 1 Micro-Op per cycle.
+class SiFive8VecGetLMulCycles<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 1,
+ !eq(mx, "M2") : 2,
+ !eq(mx, "M4") : 4,
+ !eq(mx, "M8") : 8,
+ !eq(mx, "MF2") : 1,
+ !eq(mx, "MF4") : 1,
+ !eq(mx, "MF8") : 1
+ );
+}
+
+class SiFive8VecGetVLMAX<string mx, int sew> {
+ defvar LMUL = SiFive8VecGetLMulCycles<mx>.c;
+ int val = !cond(
+ !eq(mx, "MF2") : !div(!div(SiFive8VLEN, 2), sew),
+ !eq(mx, "MF4") : !div(!div(SiFive8VLEN, 4), sew),
+ !eq(mx, "MF8") : !div(!div(SiFive8VLEN, 8), sew),
+ true: !div(!mul(SiFive8VLEN, LMUL), sew)
+ );
+}
+
+class SiFive8VecStridedLdStLatency<string mx, int sew> {
+ defvar VL = SiFive8VecGetVLMAX<mx, sew>.val;
+ int val = !cond(
+ !eq(VL, 2): 13,
+ !eq(VL, 4): 18,
+ !eq(VL, 8): 22,
+ !eq(VL, 16): 30,
+ // VL=32,64,128
+ true: !sub(VL, 2)
+ );
+}
+
+// Latency for segmented loads and stores are calculated as vl * nf.
+class SiFive8VecSegmentedLdStCycles<string mx, int sew, int nf> {
+ int c = !mul(SiFive8VecGetVLMAX<mx, sew>.val, nf);
+}
+
+// All the shared WriteRes entries
+multiclass SiFive8VecBaseWriteRes<SiFive8IntProcResources IntProcRes,
+ SiFive8VecProcResources VecProcRes> {
+ // Vector Byte Length vlenb
+ def : WriteRes<WriteRdVLENB, [IntProcRes.SYS]>;
+
+ // Configuration-Setting Instructions
+ def : WriteRes<WriteVSETVLI, [IntProcRes.VSet]>;
+ def : WriteRes<WriteVSETIVLI, [IntProcRes.VSet]>;
+ def : WriteRes<WriteVSETVL, [IntProcRes.VSet]>;
+
+ // Vector Loads and Stores
+
+ // Note that the latency of vector loads are measured by consuming the loaded
+ // value with vmv.x.s before subtracting the latency of vmv.x.s from the number.
+ foreach mx = SchedMxList in {
+ defvar LMulLat = SiFive8VecGetLMulCycles<mx>.c;
+ defvar IsWorstCase = SiFive8VecIsWorstCaseMX<mx, SchedMxList>.c;
+ // The occupancy of unit-strided load / store is equal to LMUL.
+ let Latency = 8 in {
+ let ReleaseAtCycles = [LMulLat] in {
+ defm : LMULWriteResMX<"WriteVLDE", [VecProcRes.Load], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVLDFF", [VecProcRes.Load], mx, IsWorstCase>;
+
+ defm : LMULWriteResMX<"WriteVSTE", [VecProcRes.Store], mx, IsWorstCase>;
+ }
+
+ // Mask load and store always have EMUL=1.
+ let ReleaseAtCycles = [SiFive8VecGetLMulCycles<"M1">.c] in {
+ defm : LMULWriteResMX<"WriteVLDM", [VecProcRes.Load], mx, IsWorstCase=!eq(mx, "M1")>;
+ defm : LMULWriteResMX<"WriteVSTM", [VecProcRes.Store], mx, IsWorstCase=!eq(mx, "M1")>;
+ }
+ }
+ foreach eew = [8, 16, 32, 64] in {
+ let Latency = SiFive8VecStridedLdStLatency<mx, eew>.val,
+ ReleaseAtCycles = [SiFive8VecGetVLMAX<mx, eew>.val] in {
+ defm : LMULWriteResMX<"WriteVLDS" # eew, [VecProcRes.Load], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVLDUX" # eew, [VecProcRes.Load], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVLDOX" # eew, [VecProcRes.Load], mx, IsWorstCase>;
+
+ defm : LMULWriteResMX<"WriteVSTS" # eew, [VecProcRes.Store], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVSTUX" # eew, [VecProcRes.Store], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVSTOX" # eew, [VecProcRes.Store], mx, IsWorstCase>;
+ }
+ }
+ }
+
+ foreach mx = SchedMxList in {
+ foreach nf=2-8 in {
+ foreach eew = [8, 16, 32, 64] in {
+ defvar LMulLat = SiFive8VecSegmentedLdStCycles<mx, eew, nf>.c;
+ defvar IsWorstCase = SiFive8VecIsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = !add(12, LMulLat), ReleaseAtCycles = [!add(12, LMulLat)] in {
+ defm : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [VecProcRes.Load], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [VecProcRes.Load], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [VecProcRes.Load], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [VecProcRes.Load], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [VecProcRes.Load], mx, IsWorstCase>;
+ }
+ let Latency = !add(1, LMulLat), ReleaseAtCycles = [!add(12, LMulLat)] in {
+ defm : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [VecProcRes.Store], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [VecProcRes.Store], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [VecProcRes.Store], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [VecProcRes.Store], mx, IsWorstCase>;
+ }
+ }
+ }
+ }
+
+ // Whole register move/load/store
+ foreach LMul = [1, 2, 4, 8] in {
+ let Latency = 8, ReleaseAtCycles = [LMul] in {
+ def : WriteRes<!cast<SchedWrite>("WriteVLD" # LMul # "R"), [VecProcRes.Load]>;
+ def : WriteRes<!cast<SchedWrite>("WriteVST" # LMul # "R"), [VecProcRes.Store]>;
+ }
+ let Latency = 2, ReleaseAtCycles = [LMul] in {
+ def : WriteRes<!cast<SchedWrite>("WriteVMov" # LMul # "V"), [VecProcRes.Arith]>;
+ }
+ }
+
+ // Worst case needs 51/45/42/72 * lmul cycles for i8/16/32/64.
+ foreach mx = SchedMxList in {
+ foreach sew = SchedSEWSet<mx>.val in {
+ defvar LMulLat = SiFive8VecGetLMulCycles<mx>.c;
+ defvar IsWorstCase = SiFive8VecIsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
+ defvar DivMicroOpLat =
+ !cond(!eq(sew, 8): 51, !eq(sew, 16): 45, !eq(sew, 32): 42,
+ /* SEW=64 */ true: 72);
+ defvar DivLatency = !mul(DivMicroOpLat, LMulLat);
+ let Latency = DivLatency, ReleaseAtCycles = [LMulLat, DivLatency] in {
+ defm : LMULSEWWriteResMXSEW<"WriteVIDivV", VecProcRes.Div, mx, sew, IsWorstCase>;
+ defm : LMULSEWWriteResMXSEW<"WriteVIDivX", VecProcRes.Div, mx, sew, IsWorstCase>;
+ }
+ }
+ }
+
+ // Narrowing Shift and Clips
+ foreach mx = SchedMxListW in {
+ defvar LMulLat = SiFive8VecGetLMulCycles<mx>.c;
+ defvar IsWorstCase = SiFive8VecIsWorstCaseMX<mx, SchedMxListW>.c;
+ let Latency = 2, ReleaseAtCycles = [LMulLat] in {
+ defm : LMULWriteResMX<"WriteVNShiftV", [VecProcRes.Arith], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVNShiftX", [VecProcRes.Arith], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVNShiftI", [VecProcRes.Arith], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVNClipV", [VecProcRes.Arith], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVNClipX", [VecProcRes.Arith], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVNClipI", [VecProcRes.Arith], mx, IsWorstCase>;
+ }
+ }
+
+ // 12. Vector Fixed-Point Arithmetic Instructions
+ foreach mx = SchedMxList in {
+ defvar LMulLat = SiFive8VecGetLMulCycles<mx>.c;
+ defvar IsWorstCase = SiFive8VecIsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = 6, ReleaseAtCycles = [LMulLat] in {
+ defm : LMULWriteResMX<"WriteVSALUV", [VecProcRes.Arith], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVSALUX", [VecProcRes.Arith], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVSALUI", [VecProcRes.Arith], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVAALUV", [VecProcRes.Arith], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVAALUX", [VecProcRes.Arith], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVSMulV", [VecProcRes.Arith], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVSMulX", [VecProcRes.Arith], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVSShiftV", [VecProcRes.Arith], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVSShiftX", [VecProcRes.Arith], mx, IsWorstCase>;
+ defm : LMULWriteResMX<"WriteVSShiftI", [VecProcRes.Arith], mx, IsWorstCase>;
+ }
+ }
+
+ // 13. Vector Floating-Point Instructions
+ foreach mx = SchedMxListF in {
+ foreach sew = SchedSEWSet<mx, isF=1>.val in {
+ defvar LMulLat = SiFive8VecGetLMulCycles<mx>.c;
+ defvar IsWorstCase = SiFive8VecIsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
+ let Latency = 6, ReleaseAtCycles = [LMulLat] in {
+ defm : LMULSEWWriteResMXSEW<"WriteVFALUV", [VecProcRes.Arith], mx, sew, IsWorstCase>;
+ defm : LMULSEWWriteResMXSEW<"WriteVFALUF", [VecProcRes.Arith], mx, sew, IsWorstCase>;
+ defm : LMULSEWWriteResMXSEW<"WriteVFMulV", [VecProcRes.Arith], mx, sew, IsWorstCase>;
+ defm : LMULSEWWriteResMXSEW<"WriteVFMulF", [VecProcRes.Arith], mx, sew, IsWorstCase>;
+ defm : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [VecProcRes.Arith], mx, sew, IsWorstCase>;
+ defm : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [VecProcRes.Arith], mx, sew, IsWorstCase>;
+ }
+ }
+ }
+
+ // Widening
+ foreach mx = SchedMxListW in {
+ foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
+ defvar LMulLat = SiFive8VecGetLMulCycles<mx>.c;
+ defvar IsWorstCase = SiFive8VecIsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;
+ let Latency = 3, ReleaseAtCycles = [LMulLat] in
+ defm : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [VecProcRes.Arith], mx, sew, IsWorstCase>;
+ }
+ }
+ foreach mx = SchedMxListFW in {
+ defvar LMulLat = SiFive8VecGetLMulCycles<mx>.c...
[truncated]
|
🐧 Linux x64 Test Results
Failed Tests(click on a test name to see its output) LLVMLLVM.tools/llvm-symbolizer/wasm-basic.s (Likely Already Failing)This test is already failing at the base commit.If these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
🪟 Windows x64 Test Results
Failed Tests(click on a test name to see its output) LLVMLLVM.tools/llvm-symbolizer/wasm-basic.sIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
Stacks on top of #199796
The vsetvli instruction (and friends) should use the first integer pipe (i.e. SiFiveP400IEXQ0)