
spinal.lib.bus.tilelink.coherent.Cache.scala Maven / Gradle / Ivy
package spinal.lib.bus.tilelink.coherent
import spinal.core._
import spinal.lib._
import spinal.lib.bus.misc.SizeMapping
import spinal.lib.bus.tilelink._
import spinal.lib.fsm.{State, StateDelay, StateMachine}
import spinal.lib.misc.Plru
import spinal.lib.pipeline._
import scala.collection.mutable.ArrayBuffer
case class SelfFLush(from : BigInt, upTo : BigInt, period : BigInt)
case class CacheParam(var unp : NodeParameters,
var downPendingMax : Int,
var cacheWays: Int,
var cacheBytes: Int,
var blockSize : Int,
var cnp : NodeParameters = null,
var cacheBanks : Int = 1,
var probeCount : Int = 8,
var aBufferCount: Int = 4,
var ctrlLoopbackDepth : Int = 4,
var generalSlotCount : Int = 8,
var generalSlotCountUpCOnly : Int = 2,
var victimBufferLines : Int = 2,
var upCBufferDepth : Int = 8,
var readProcessAt : Int = 2,
var coherentRegion : UInt => Bool,
var selfFlush : SelfFLush = null,
var allocateOnMiss : (Cache.CtrlOpcode.C, UInt, UInt, UInt, Bits) => Bool = null // opcode, source, address, size
) {
assert(isPow2(cacheBytes))
def withSelfFlush = selfFlush != null
def withFlush = withCtrl || withSelfFlush
def withCtrl = cnp != null
def lockSets = cacheSets //TODO min trackedSets !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
def cacheLines = cacheBytes / blockSize
def cacheSets = cacheLines / cacheWays
def addressWidth = unp.m.addressWidth
def dataWidth = unp.m.dataWidth
def dataBytes = dataWidth/8
def tagRange = addressWidth-1 downto log2Up(cacheBytes/cacheWays)
def lineRange = tagRange.low-1 downto log2Up(lineSize)
def wordRange = log2Up(lineSize)-1 downto log2Up(dataBytes)
def wordsPerLine = lineSize/dataBytes
def refillRange = tagRange.high downto lineRange.low
def blockRange = addressWidth-1 downto log2Up(lineSize)
def lineSize = blockSize
def setsRange = lineRange
def cacheAddressWidth = log2Up(cacheBytes/dataBytes)
def addressCheckRange = setsRange.high downto log2Up(lineSize) //For now, it also avoid way clash (gsHits)
def addressCheckWidth = addressCheckRange.size
// def lockSetsRange = log2Up(lineSize*lockSets)-1 downto log2Up(lineSize)
}
object Cache extends AreaObject{
val CtrlOpcode = new SpinalEnum {
val ACQUIRE_BLOCK, ACQUIRE_PERM, RELEASE, RELEASE_DATA, PUT_PARTIAL_DATA, PUT_FULL_DATA, GET, EVICT, FLUSH = newElement()
}
val ToUpDOpcode = new SpinalEnum {
val NONE, ACCESS_ACK, ACCESS_ACK_DATA, GRANT, GRANT_DATA, RELEASE_ACK = newElement()
}
def downM2s(name : Nameable,
addressWidth : Int,
dataWidth : Int,
blockSize : Int,
generalSlotCount : Int) = M2sParameters(
addressWidth = addressWidth,
dataWidth = dataWidth,
masters = List(M2sAgent(
name = name,
mapping = List(M2sSource(
id = SizeMapping(0, 2 << log2Up(generalSlotCount)),
emits = M2sTransfers(
putPartial = SizeRange.upTo(blockSize),
putFull = SizeRange.upTo(blockSize),
get = SizeRange.upTo(blockSize)
)
)
))
)
)
def upS2m(name : Nameable,
blockSize : Int,
generalSlotCount : Int) = S2mParameters(List(
S2mAgent(
name = name,
emits = S2mTransfers(
probe = SizeRange(blockSize)
),
sinkId = SizeMapping(0, generalSlotCount)
)
))
}
class Cache(val p : CacheParam) extends Component {
import p._
import Cache.CtrlOpcode
assert(generalSlotCountUpCOnly < generalSlotCount)
val ubp = p.unp.toBusParameter()
val dbp = NodeParameters(
m = Cache.downM2s(
name = this,
addressWidth = addressWidth,
dataWidth = dataWidth,
blockSize = blockSize,
generalSlotCount = generalSlotCount
),
s = S2mParameters.none()
).toBusParameter()
val io = new Bundle {
val ctrl = withCtrl generate slave(Bus(p.cnp))
val up = slave(Bus(ubp))
val down = master(Bus(dbp))
val ordering = new Bundle {
val ctrlProcess, writeBackend = master(Flow(OrderingCmd(up.p.sizeBytes)))
def all = List(ctrlProcess, writeBackend)
}
}
this.addTags(io.ordering.all.map(OrderingTag(_)))
val coherentMasters = unp.m.masters.filter(_.emits.withBCE)
val coherentMasterCount = coherentMasters.size
val coherentMasterToSource = coherentMasters.map(_.bSourceId)
val SET_ID = Stageable(UInt(log2Up(lockSets) bits))
val LOCK_CTX = Stageable(Bool())
val ADDRESS = Stageable(ubp.address)
val GS_ID = Stageable(UInt(log2Up(generalSlotCount) bits))
val emptyId = 0
def isBlockEmpty(blockId : UInt) = blockId === emptyId
def isBlockWithData(blockId : UInt) = blockId.msb
val initializer = new Area {
val initCycles = Math.max(p.lockSets, p.cacheWays)
val counter = Reg(UInt(log2Up(initCycles) + 1 bits)) init (0)
val done = counter.msb
when(!done) {
counter := counter + 1
}
}
val events = new Area{
val getPut = new Area{
val hit, miss = False
}
val acquire = new Area{
val hit, miss = False
}
}
case class Tags(val withData : Boolean) extends Bundle {
val loaded = Bool()
val tag = UInt(tagRange.size bits)
val dirty = withData generate Bool()
val trunk = Bool()
val owners = Bits(coherentMasterCount bits)
}
class LineCtrl(bytes : Int, ways: Int, withData : Boolean) extends Area {
val sets = bytes / blockSize / ways
val plru = new Area{
val ram = Mem.fill(sets)(Plru.State(cacheWays))
val read = ram.readSyncPort
val write = ram.writePort
}
val tags = new Area {
val ram = Mem.fill(sets)(Vec.fill(ways)(Tags(withData)))
val read = ram.readSyncPort
val writeRaw = ram.writePortWithMask(ways)
val write = new Area{
val valid = Bool()
val address = ram.addressType()
val mask = Bits(ways bits)
val data = Tags(withData)
writeRaw.valid := valid
writeRaw.address := address
writeRaw.mask := mask
writeRaw.data.foreach(_:= data)
assert(!(valid && data.trunk && data.owners === 0))
}
}
val data = withData generate new Area{
val upWrite, downWrite = Stream(MemWriteCmd(Bits(p.dataWidth bits), cacheAddressWidth, p.dataBytes))
val upWriteDemux = StreamDemux(upWrite, upWrite.address.resize(log2Up(cacheBanks)), cacheBanks)
val downWriteDemux = StreamDemux(downWrite, downWrite.address.resize(log2Up(cacheBanks)), cacheBanks)
val read = Stream(UInt(cacheAddressWidth bits))
val banks = for(i <- 0 until cacheBanks) yield new Area{
val ram = Mem.fill(cacheBytes/p.dataBytes/cacheBanks)(Bits(p.dataWidth bits))
val readed = Bits(p.dataWidth bits)
val writeArbiter = StreamArbiterFactory().noLock.lowerFirst.buildOn(downWriteDemux(i), upWriteDemux(i))
val write = writeArbiter.io.output.combStage()
}
val fpgaImpl = new Area{
// Use simple dual port memories
read.ready := True
val b = for((bank, i) <- banks.zipWithIndex) yield new Area{
import bank._
write.ready := True
ram.write(write.address >> log2Up(cacheBanks), write.data, write.valid, write.mask)
val readSel = read.valid && read.payload.resize(log2Up(cacheBanks)) === i
readed := ram.readSync(read.payload >> log2Up(cacheBanks), readSel)
}
}
}
}
val cache = new LineCtrl(cacheBytes, cacheWays, true)
class CtrlCmd() extends Bundle {
val opcode = CtrlOpcode()
val args = Bits(1 bits)
val address = ubp.address()
val size = ubp.size()
val source = ubp.source()
val upParam = Bits(3 bits)
val bufferAId = BUFFER_A_ID()
val probed = Bool()
val probedUnique = Bool()
val gsId = GS_ID() //Only valid when probed
val debugId = DebugId()
val withDataUpC = Bool()
val evictWay = UInt(log2Up(cacheWays) bits)
def toTrunk = args(0)
def toNone = args(0)
}
class ProberCmd() extends CtrlCmd {
val mask = Bits(coherentMasterCount bits)
val probeToN = Bool()
val evictClean = Bool()
}
class ReadDownCmd() extends Bundle {
val gsId = GS_ID()
val address = ubp.address()
val size = ubp.size()
}
class WriteBackendCmd() extends Bundle {
val fromUpA = Bool()
val fromUpC = Bool()
val toDownA = Bool() //else to cache
def toCache = !toDownA
val toUpD = Cache.ToUpDOpcode()
val toT = Bool()
val source = ubp.source()
val gsId = GS_ID()
val partialUpA = Bool()
val address = ubp.address()
val size = ubp.size()
val wayId = UInt(log2Up(cacheWays) bits)
val bufferAId = BUFFER_A_ID()
val evict = Bool()
val debugId = DebugId()
}
class ReadBackendCmd() extends Bundle {
val toUpD = Bool() // else to victim
def toVictim = !toUpD
val toWriteBackend = Bool()
val gsId = GS_ID()
val address = ubp.address()
val size = ubp.size()
val wayId = UInt(log2Up(cacheWays) bits)
// val victimId = VICTIME_ID()
val upD = new Bundle {
val opcode = Opcode.D()
val param = Bits(3 bits)
val source = ubp.source()
}
}
val CTRL_CMD = Stageable(new CtrlCmd())
val BUFFER_A_ID = Stageable(UInt((if(ubp.withDataA) log2Up(aBufferCount).toInt else 0) bits))
class Slot extends Area{
val fire = False
val valid = RegInit(False) clearWhen(fire)
}
class SlotPool[T <: Slot](slotsCount : Int)(gen : => T) extends Area{
val slots = for(i <- 0 until slotsCount) yield gen
val allocate = new Area{
val full = slots.map(_.valid).andR
val oh = B(OHMasking.firstV2(Vec(slots.map(!_.valid))))
val id = OHToUInt(oh)
}
}
// release_data =>
// get => read, [lock], [victim]
// put => write, [lock], [victim]
// acquire_block => [lock], [victim]
class GeneralSlot extends Slot{
val address = Reg(UInt(addressCheckWidth bits))
// val way = Reg(UInt(log2Up(cacheWays) bits))
val pending = new Area{
val victim, primary, acquire, victimRead, victimWrite = Reg(Bool())
fire setWhen(List(victim, acquire, primary, victimWrite).norR)
}
}
case class CtxDownD() extends Bundle {
val toUpD, toCache = Bool()
val toProbe, mergeBufferA = Bool()
// val probeId = UInt(log2Up(probeCount) bits)
val bufferAId = BUFFER_A_ID()
val wordOffset = UInt(wordRange.size bits)
val sourceId = io.up.p.source()
val setId = UInt(setsRange.size bits)
val wayId = UInt(log2Up(cacheWays) bits)
val size = ubp.size()
val acquire, release = Bool()
val toT = Bool()
}
val gs = new SlotPool(generalSlotCount)(new GeneralSlot){
val ctxDownD = new Area{
val ram = Mem.fill(generalSlotCount)(CtxDownD())
val write = ram.writePort()
}
val fullUpA = slots.dropRight(p.generalSlotCountUpCOnly).map(_.valid).andR
}
val flush = withFlush generate new Area {
val reserved = RegInit(False)
val address, upTo = Reg(ubp.address())
val start = False
val cmd = Stream(new CtrlCmd())
val fsm = new StateMachine {
val IDLE, CMD, INFLIGHT, GS = new State()
setEntry(IDLE)
val inflight = CounterUpDown(generalSlotCount + ctrlLoopbackDepth + 4)
val gsMask = Reg(Bits(generalSlotCount bits))
IDLE.whenIsActive(when(start)(goto(CMD)))
CMD whenIsActive {
when(cmd.fire) {
address := address + lineSize
when(address(blockRange) === upTo(blockRange)) {
goto(INFLIGHT)
}
}
}
gsMask := gsMask & gs.slots.map(_.valid).asBits
INFLIGHT whenIsActive {
when(inflight === 0) {
gsMask.setAll()
goto(GS)
}
}
GS whenIsActive {
when(gsMask === 0) {
reserved := False
goto(IDLE)
}
}
inflight.incrementIt setWhen (cmd.fire)
cmd.valid := isActive(CMD)
cmd.opcode := CtrlOpcode.FLUSH // CtrlOpcode()
cmd.args := 0 // Bits(1 bits)
cmd.address := address(blockRange) << blockRange.low // ubp.address()
cmd.size := log2Up(p.lineSize) // ubp.size()
cmd.source := 0 // ubp.source()
cmd.bufferAId := 0 // BUFFER_A_ID()
cmd.probed := False // Bool()
cmd.probedUnique := False // Bool()
cmd.gsId := 0 // GS_ID() //Only valid when probed
cmd.debugId := 0 // DebugId()
cmd.withDataUpC := False // Bool()
cmd.evictWay := 0 // UInt(log2Up(cacheWays) bits)
cmd.upParam := 0
}
}
val selfFlusher = withSelfFlush generate new StateMachine {
val CMD = new State()
val WAIT = new StateDelay(selfFlush.period){whenCompleted(goto(CMD))}
setEntry(WAIT)
CMD.whenIsActive{
when(!flush.reserved){
flush.reserved := True
flush.start := True
flush.address := selfFlush.from
flush.upTo := selfFlush.upTo
goto(WAIT)
}
}
}
val ctrlLogic = withCtrl generate new Area {
val mapper = new SlaveFactory(io.ctrl, allowBurst = true)
mapper.setOnSet(flush.start, 0x08, 1)
flush.reserved setWhen (!flush.reserved && mapper.isReading(0x08))
mapper.read(flush.reserved || withSelfFlush.mux(selfFlusher.isActive(selfFlusher.CMD), False), 0x08)
mapper.writeMultiWord(flush.address, 0x10)
mapper.writeMultiWord(flush.upTo, 0x18)
}
val fromUpA = new Area{
val halted = io.up.a.haltWhen(!initializer.done)
val buffer = ubp.withDataA generate new ChannelDataBuffer(
entries = aBufferCount,
blockSize = blockSize,
dataBytes = ubp.dataBytes
) {
val read = ram.readSyncPort()
val pusher = push(halted)
}
val conv = if (ubp.withDataA) buffer.pusher.down else halted
val toCtrl = conv.swapPayload(new CtrlCmd())
if (ubp.withDataA) toCtrl.bufferAId := buffer.pusher.bufferId
toCtrl.opcode := conv.opcode.muxDc(
Opcode.A.GET -> CtrlOpcode.GET(),
Opcode.A.PUT_PARTIAL_DATA -> CtrlOpcode.PUT_PARTIAL_DATA(),
Opcode.A.PUT_FULL_DATA -> CtrlOpcode.PUT_FULL_DATA(),
Opcode.A.ACQUIRE_PERM -> CtrlOpcode.ACQUIRE_PERM(),
Opcode.A.ACQUIRE_BLOCK -> CtrlOpcode.ACQUIRE_BLOCK()
)
toCtrl.toTrunk := conv.param =/= Param.Grow.NtoB
toCtrl.address := conv.address
toCtrl.size := conv.size
toCtrl.source := conv.source
toCtrl.probed := False
toCtrl.gsId.assignDontCare()
toCtrl.debugId := conv.debugId
toCtrl.withDataUpC := False
toCtrl.evictWay.assignDontCare()
toCtrl.probedUnique := False
toCtrl.upParam := conv.param
}
val victimBuffer = new Area{
val ram = Mem.fill(generalSlotCount*wordsPerLine)(io.up.p.data())
val write = ram.writePort()
val read = ram.readSyncPort()
}
val upCSplit = new Area{
val (cmdFork, dataFork) = StreamFork2(io.up.c)
val cmd = cmdFork.translateWith(io.up.c.asNoData()).takeWhen(io.up.c.isFirst())
val data = dataFork.takeWhen(io.up.c.withBeats).translateWith(dataFork.data)
val dataPop = data.queue(upCBufferDepth).m2sPipe()
}
class ProberSlot extends Slot{
val address = Reg(UInt(blockRange.size bits)) //We realy need the full address range, as we need to catch RELEASE_DATA while proving, to update the dirtyness of the data
val pending = Reg(UInt(log2Up(coherentMasterCount + 1) bits))
val probeAckDataCompleted = Reg(Bool())
val unique = Reg(Bool())
val done = valid && (pending & ~U(probeAckDataCompleted, widthOf(pending) bits)) === 0
val evictClean = Reg(Bool())
}
//Currently we ignore the cache owners tracking of PROBE_ACK_DATA TtoN (will behave like a TtoB)
val prober = new SlotPool(probeCount)(new ProberSlot){
val ctx = new Area{
val ram = Mem.fill(probeCount)(new CtrlCmd())
val write = ram.writePort()
}
val cmd = Stream(new ProberCmd())
val toSlot = new Area {
val full = slots.map(_.valid).andR
ctx.write.valid := False
ctx.write.address := allocate.id
ctx.write.data.assignSomeByName(cmd.payload)
val pending = CountOne(cmd.mask)
when(cmd.fire) {
slots.onMask(allocate.oh) { s =>
s.valid := True
s.address := cmd.address(blockRange)
s.pending := pending
s.unique := True
s.probeAckDataCompleted := False
s.evictClean := cmd.evictClean
}
ctx.write.valid := True
}
val halted = cmd.haltWhen(full)
}
val toUpB = new Area{
val bus = io.up.b
val sendUpB = toSlot.halted.pipelined(m2s = true, s2m = true)
val fired = Reg(Bits(coherentMasterCount bits)) init (0)
val requests = sendUpB.mask & ~fired
val masterOh = OHMasking.firstV2(requests)
bus.valid := sendUpB.valid && requests.orR
bus.opcode := (sendUpB.opcode === CtrlOpcode.ACQUIRE_PERM).mux(Opcode.B.PROBE_PERM, Opcode.B.PROBE_BLOCK)
bus.param := sendUpB.probeToN ? B(Param.Cap.toN, 3 bits) | B(Param.Cap.toB, 3 bits)
bus.source := OhMux(masterOh, coherentMasterToSource.map(id => U(id, ubp.sourceWidth bits)).toList)
bus.address := sendUpB.address(blockRange) @@ U(0, blockRange.low bits)
bus.size := log2Up(p.blockSize)
sendUpB.ready := requests === 0
when(bus.fire) {
fired.asBools.onMask(masterOh)(_ := True)
}
when(sendUpB.ready) {
fired := 0
}
}
val upC = new Area {
val input = upCSplit.cmd
val isReleaseData = input.opcode === Opcode.C.RELEASE_DATA
val isProbeNoData = input.opcode === Opcode.C.PROBE_ACK
val isProbe = Opcode.C.isProbe(input.opcode)
val hitOh = slots.map(s => s.valid && s.address(0, addressCheckRange.size bits) === input.address(addressCheckRange))
val hitId = OHToUInt(hitOh)
val pending = slots.map(_.pending).read(hitId)
val pendingNext = pending - 1
val masterOh = coherentMasterToSource.map(input.source === _).asBits
val masterId = OHToUInt(masterOh)
val keptCopy = Param.reportPruneKeepCopy(input.param)
when(input.fire && isProbe) {
slots.onMask(hitOh) { s =>
s.unique.clearWhen(keptCopy)
when(isProbeNoData) { s.pending := pendingNext }
}
}
val filtred = input.throwWhen(upCSplit.cmd.opcode === Opcode.C.PROBE_ACK)
class UpCCmd extends Bundle{
val hitId = UInt(log2Up(probeCount) bits)
val opcode = Opcode.C()
val address = ubp.address()
val source = ubp.source()
val toNone = Bool()
}
val down = filtred.swapPayload(new UpCCmd)
down.hitId := hitId
down.opcode := input.opcode
down.address := input.address
down.source := input.source
down.toNone := !Param.reportPruneKeepCopy(input.param)
}
val schedule = new Area {
val fromUpC = upC.down.halfPipe()
val fromProbe = Stream(NoData())
val merged = Stream(new CtrlCmd())
val hits = slots.map(_.done).asBits
val hitOh = CombInit(OHMasking.roundRobinNext(hits, fromProbe.fire))
val probeId = OHToUInt(hitOh)
fromProbe.valid := hits.orR
when(fromProbe.fire || fromUpC.fire && fromUpC.opcode === Opcode.C.PROBE_ACK_DATA) {
slots.onSel(probeId) { s =>
s.fire := True
}
}
//fromUpC does not come for PROBE_ACK
fromUpC.ready := merged.ready
fromProbe.ready := merged.ready && !fromUpC.valid
merged.valid := fromUpC.valid || fromProbe.valid
merged.payload := ctx.ram.readAsync(probeId)
merged.probedUnique.removeAssignments() := slots.reader(hitOh)(_.unique)
when(fromUpC.valid) {
probeId := fromUpC.hitId
merged.probed clearWhen(Opcode.C.isRelease(fromUpC.opcode))
merged.withDataUpC setWhen(Opcode.C.withData(fromUpC.opcode))
merged.probedUnique := fromUpC.toNone
when(Opcode.C.isRelease(fromUpC.opcode)){
merged.address := fromUpC.address
merged.source := fromUpC.source
merged.toNone := fromUpC.toNone
merged.opcode := fromUpC.opcode.muxDc(
Opcode.C.RELEASE -> CtrlOpcode.RELEASE(),
Opcode.C.RELEASE_DATA -> CtrlOpcode.RELEASE_DATA()
)
}
}
val isEvict = merged.opcode === CtrlOpcode.EVICT
val isEvictClean = isEvict && !fromUpC.valid && (hitOh & slots.map(_.evictClean).asBits).orR
val toCtrl = merged.throwWhen(isEvictClean)
when(merged.valid && isEvictClean){
gs.slots.onSel(merged.gsId){s =>
s.pending.victim := False
}
}
}
}
//TODO check older way is allocated
val ctrl = new Pipeline{
val stages = newChained(3, Connection.M2S())
val inserterStage = stages(0)
val addressStage = stages(0)
val dataStage = stages(1)
val tagStage = stages(1)
val prepStage = stages(1)
val processStage = stages(2)
import CtrlOpcode._
val loopback = new Area{
val occupancy = new CounterUpDown(ctrlLoopbackDepth, handleOverflow = false)
val allowUpA = !occupancy.mayOverflow
val fifo = StreamFifo(new CtrlCmd, ctrlLoopbackDepth, forFMax = true)
}
val inserter = new Area {
import inserterStage._
def hazardHalt(that : Stream[CtrlCmd]) = {
val hits = stages.tail.map(s => s.valid && s(CTRL_CMD).address(setsRange) === that.address(setsRange))
that.haltWhen(hits.orR)
}
val upAHold = False
val cmds = ArrayBuffer[Stream[CtrlCmd]]()
cmds += prober.schedule.toCtrl.pipelined(m2s = true, s2m = true)
cmds += loopback.fifo.io.pop.halfPipe()
cmds += fromUpA.toCtrl.continueWhen(loopback.allowUpA && !upAHold)
val fromFlush = withFlush generate new Area{
val regulator = Reg(UInt(2 bits)) init(0)
when(flush.cmd.valid){
when(fromUpA.toCtrl.fire) {
regulator := regulator + 1
}
when(flush.cmd.ready) {
regulator := 0
}
upAHold setWhen(regulator.andR)
}
cmds += flush.cmd.continueWhen(loopback.allowUpA)
}
val arbiter = StreamArbiterFactory().lowerFirst.noLock.buildOn(cmds)
when(fromUpA.toCtrl.fire || withFlush.mux(flush.cmd.fire, False)) {
loopback.occupancy.increment()
}
driveFrom(hazardHalt(arbiter.io.output))
inserterStage(CTRL_CMD) := arbiter.io.output
val SOURCE_OH = insert(B(coherentMasters.map(_.sourceHit(CTRL_CMD.source))))
}
cache.tags.read.cmd.valid := addressStage.isFireing
cache.tags.read.cmd.payload := addressStage(CTRL_CMD).address(lineRange)
val CACHE_TAGS = dataStage.insert(cache.tags.read.rsp)
cache.plru.read.cmd.valid := addressStage.isFireing
cache.plru.read.cmd.payload := addressStage(CTRL_CMD).address(setsRange)
val CACHE_PLRU = dataStage.insert(cache.plru.read.rsp)
val tags = new Area{
import tagStage._
val read = tagStage(CACHE_TAGS)
val CACHE_HITS = insert(read.map(t => t.loaded && t.tag === CTRL_CMD.address(tagRange)).asBits)
val SOURCE_HITS = insert(read.map(t => (t.owners & inserter.SOURCE_OH).orR).asBits)
}
val preCtrl = new Area{
import prepStage._
val PROBE_REGION = insert(p.coherentRegion(CTRL_CMD.address))
val ALLOCATE_ON_MISS = insert(p.allocateOnMiss(CTRL_CMD.opcode, CTRL_CMD.source, CTRL_CMD.address, CTRL_CMD.size, CTRL_CMD.upParam)) //TODO
val FROM_A = insert(List(GET(), PUT_FULL_DATA(), PUT_PARTIAL_DATA(), ACQUIRE_BLOCK(), ACQUIRE_PERM(), FLUSH()).sContains(CTRL_CMD.opcode))
val FROM_C_RELEASE = insert(List(RELEASE(), RELEASE_DATA()).sContains(CTRL_CMD.opcode))
val GET_PUT = insert(List(GET(), PUT_FULL_DATA(), PUT_PARTIAL_DATA()).sContains(CTRL_CMD.opcode))
val ACQUIRE = insert(List(ACQUIRE_PERM(), ACQUIRE_BLOCK()).sContains(CTRL_CMD.opcode))
val IS_RELEASE = insert(List(RELEASE(), RELEASE_DATA()).sContains(CTRL_CMD.opcode))
val IS_EVICT = insert(List(EVICT()).sContains(CTRL_CMD.opcode))
val IS_FLUSH = insert(List(FLUSH()).sContains(CTRL_CMD.opcode))
val IS_GET = insert(List(GET()).sContains(CTRL_CMD.opcode))
val IS_PUT = insert(List(PUT_FULL_DATA(), PUT_PARTIAL_DATA()).sContains(CTRL_CMD.opcode))
val IS_PUT_FULL_BLOCK = insert(CTRL_CMD.opcode === CtrlOpcode.PUT_FULL_DATA && CTRL_CMD.size === log2Up(blockSize))
val WRITE_DATA = insert(List(PUT_PARTIAL_DATA(), PUT_FULL_DATA(), RELEASE_DATA()).sContains(CTRL_CMD.opcode))
val GS_NEED = insert(List(ACQUIRE_BLOCK, ACQUIRE_PERM, RELEASE_DATA, PUT_PARTIAL_DATA, PUT_FULL_DATA, GET, FLUSH).map(_.craft()).sContains(CTRL_CMD.opcode))
val GS_HITS = insert(gs.slots.map(s => s.valid && CTRL_CMD.address(addressCheckRange) === s.address).asBits)
val GS_HIT = insert(GS_HITS.orR)
val GS_OH = insert(UIntToOh(CTRL_CMD.gsId, generalSlotCount))
//For as long as the cache is inclusive
when(ACQUIRE){
ALLOCATE_ON_MISS := True
}
}
val process = new Area{
import processStage._
val redoUpA = False
assert(!(isValid && redoUpA && !preCtrl.FROM_A))
throwIt(redoUpA)
redoUpA.setWhen(preCtrl.FROM_A && !CTRL_CMD.probed && preCtrl.GS_HIT) //TODO could be less pessimistic
val stallIt = False
assert(!(isValid && stallIt && preCtrl.FROM_A))
haltIt(stallIt)
val firstCycle = RegNext(!isStuck || stallIt) init (True)
val gsHitVictim = CTRL_CMD.opcode === RELEASE_DATA && (preCtrl.GS_HITS & B(gs.slots.map(_.pending.victimWrite))).orR
stallIt setWhen(gsHitVictim)
val askAllocate = False //Will handle victim
val askProbe = False
val askReadDown = False
val askReadBackend = False
val askOrdering = False
val askWriteBackend = False
val askGs = preCtrl.GS_NEED && !CTRL_CMD.probed
val askUpD = False
when(askGs){
when(preCtrl.FROM_A && gs.fullUpA){
redoUpA := True
}
when(!preCtrl.FROM_A && gs.allocate.full){
stallIt := True
}
}
when(isFireing && preCtrl.FROM_A && askGs && !redoUpA){
loopback.occupancy.decrement()
}
// val toProbe = forkStream(askProbe && !redoUpA).swapPayload(new ProbeCmd())
val toReadDown = forkStream(askReadDown && !redoUpA).swapPayload(new ReadDownCmd)
val toReadBackend = forkStream(askReadBackend && !redoUpA).swapPayload(new ReadBackendCmd)
val toWriteBackend = forkStream(askWriteBackend && !redoUpA && !stallIt).swapPayload(new WriteBackendCmd)
val toUpD = forkStream(askUpD && !redoUpA).swapPayload(io.up.d.payloadType)
val toOrdering = forkFlow(askOrdering && !redoUpA).swapPayload(io.ordering.ctrlProcess.payloadType)
val CACHE_HIT = insert(tags.CACHE_HITS.orR)
val CACHE_HIT_WAY_ID = insert(OHToUInt(tags.CACHE_HITS))
val SOURCE_HIT = insert((tags.CACHE_HITS & tags.SOURCE_HITS).orR)
val CACHE_LINE = insert(OhMux.or(tags.CACHE_HITS, CACHE_TAGS))
val OTHERS = insert(CACHE_LINE.owners & ~inserter.SOURCE_OH)
val SELF = insert((CACHE_LINE.owners & inserter.SOURCE_OH).orR)
val OTHER = insert(OTHERS.orR)
val ANY = insert(CACHE_LINE.owners.orR)
val backendWayId = CACHE_HIT_WAY_ID()
val gotGs = RegInit(False)
val gsOhLocked = RegNextWhen(gs.allocate.oh, !gotGs)
val gsOh = gotGs.mux(gsOhLocked, gs.allocate.oh)
when(CTRL_CMD.probed) {
gsOh := preCtrl.GS_OH
}
val gsId = OHToUInt(gsOh)
val gsAddress = CombInit(CTRL_CMD.address(addressCheckRange))
val gsRefill = False
val gsWrite = preCtrl.WRITE_DATA || CTRL_CMD.withDataUpC
val gsWay = CombInit(backendWayId)
val gsPendingVictim = False
val gsPendingVictimReadWrite = False
val gsPendingPrimary = True
//TODO don't forget to ensure that a victim get out of the cache before downD/upA erase it
toUpD.opcode.assignDontCare()
toUpD.source := CTRL_CMD.source
toUpD.sink := gsId
toUpD.size := log2Up(blockSize)
toUpD.param := 0
toUpD.denied := False
toUpD.corrupt := False
toUpD.data.assignDontCare()
val clearPrimary = False
val oldClearPrimary = RegNext(clearPrimary && isFireing && !isRemoved) init(False)
val oldGsId = RegNext(gsId)
when(oldClearPrimary) {
gs.slots.onSel(oldGsId)(_.pending.primary := False)
}
prober.cmd.valid := isValid && askProbe && !redoUpA && firstCycle
prober.cmd.payload.assignSomeByName(CTRL_CMD)
prober.cmd.mask.assignDontCare()
prober.cmd.probeToN.assignDontCare()
prober.cmd.probed.removeAssignments() := True
prober.cmd.gsId.removeAssignments() := gsId
prober.cmd.evictClean := False
prober.cmd.evictWay.removeAssignments() := backendWayId
loopback.fifo.io.push.valid := isValid && redoUpA
loopback.fifo.io.push.payload := CTRL_CMD
redoUpA setWhen(!CTRL_CMD.probed && preCtrl.FROM_A && !prober.cmd.ready && firstCycle)
assert(!(isValid && redoUpA && !loopback.fifo.io.push.ready))
val doIt = isFireing && !isRemoved
val olderWay = new Area{
val plru = new Plru(cacheWays, false)
plru.io.context.state := CACHE_PLRU
cache.plru.write.valid := CACHE_HIT
plru.io.update.id := CACHE_HIT_WAY_ID
when(askAllocate) {
cache.plru.write.valid := True
plru.io.update.id := plru.io.evict.id
}
cache.plru.write.valid clearWhen (!isFireing)
cache.plru.write.address := CTRL_CMD.address(setsRange)
cache.plru.write.data := plru.io.update.state
val unlocked = CombInit(!preCtrl.GS_HIT) //Pessimistic, as way check could help reduce conflict
val wayId = CombInit(plru.io.evict.id)
val tags = CACHE_TAGS(wayId)
val address = tags.tag @@ CTRL_CMD.address(setsRange) @@ U(0, log2Up(blockSize) bits)
}
backendWayId := CACHE_HIT_WAY_ID | olderWay.wayId.andMask(askAllocate)
cache.tags.write.valid := tags.CACHE_HITS.orR || askAllocate
cache.tags.write.address := CTRL_CMD.address(setsRange)
cache.tags.write.mask := tags.CACHE_HITS | UIntToOh(olderWay.wayId).andMask(askAllocate)
cache.tags.write.data.loaded := True
cache.tags.write.data.tag := CTRL_CMD.address(tagRange)
cache.tags.write.data.dirty := CACHE_LINE.dirty && !askAllocate
cache.tags.write.data.trunk := CACHE_LINE.trunk
val owners = new Area {
val add, remove, clean = False
val next = (CACHE_LINE.owners.andMask(!clean) | inserter.SOURCE_OH.andMask(add)) & ~inserter.SOURCE_OH.andMask(remove)
cache.tags.write.data.owners.removeAssignments() := next
clean setWhen(CTRL_CMD.probed && CTRL_CMD.probedUnique)
}
when(isValid && askGs && !redoUpA && !stallIt) {
gotGs := True
gs.slots.onMask(gsOh) { s =>
s.address := gsAddress
when(firstCycle) {
s.pending.victim := gsPendingVictim
s.pending.victimRead := gsPendingVictimReadWrite
s.pending.victimWrite := gsPendingVictimReadWrite
s.pending.primary := gsPendingPrimary
s.pending.acquire := preCtrl.ACQUIRE
}
when(isReady) {
s.valid := True
}
}
}
gotGs clearWhen (isFireing)
toReadBackend.address := CTRL_CMD.address
toReadBackend.size := CTRL_CMD.size
toReadBackend.gsId := gsId
toReadBackend.wayId := backendWayId
toReadBackend.toUpD := False
toReadBackend.upD.opcode.assignDontCare()
toReadBackend.upD.param := 0
toReadBackend.upD.source := CTRL_CMD.source
toReadBackend.toWriteBackend := False
toWriteBackend.toDownA := False
toWriteBackend.fromUpA := preCtrl.IS_PUT
toWriteBackend.fromUpC := CTRL_CMD.withDataUpC
toWriteBackend.address := CTRL_CMD.address
toWriteBackend.size := CTRL_CMD.size
toWriteBackend.gsId := gsId
toWriteBackend.bufferAId := CTRL_CMD.bufferAId
toWriteBackend.partialUpA := CTRL_CMD.opcode === CtrlOpcode.PUT_PARTIAL_DATA
toWriteBackend.wayId := backendWayId
toWriteBackend.source := CTRL_CMD.source
toWriteBackend.toT := True
toWriteBackend.toUpD := Cache.ToUpDOpcode.NONE()
toWriteBackend.evict := False
toWriteBackend.debugId := CTRL_CMD.debugId
toReadDown.gsId := gsId
toReadDown.address := CTRL_CMD.address
toReadDown.size := CTRL_CMD.size
when(preCtrl.ALLOCATE_ON_MISS){
toReadDown.address(log2Up(blockSize)-1 downto 0) := 0
toReadDown.size := log2Up(blockSize)
}
val ctxDownDWritten = RegInit(False) setWhen (gs.ctxDownD.write.valid) clearWhen (isFireing)
val ctxDownD = gs.ctxDownD.write
ctxDownD.valid := isValid && askGs && !redoUpA && !stallIt && !ctxDownDWritten
ctxDownD.address := gsId
ctxDownD.data.toUpD := False
ctxDownD.data.toCache := False
ctxDownD.data.toProbe := False
ctxDownD.data.toT := True
ctxDownD.data.acquire := preCtrl.ACQUIRE
ctxDownD.data.release := preCtrl.IS_RELEASE
ctxDownD.data.mergeBufferA := False
ctxDownD.data.bufferAId := CTRL_CMD.bufferAId
ctxDownD.data.wordOffset := CTRL_CMD.address(wordRange)
ctxDownD.data.setId := CTRL_CMD.address(setsRange)
ctxDownD.data.size := CTRL_CMD.size
ctxDownD.data.sourceId := CTRL_CMD.source
ctxDownD.data.wayId := backendWayId
toOrdering.debugId := CTRL_CMD.debugId
toOrdering.bytes := (U(1) << CTRL_CMD.size).resized
toOrdering >> io.ordering.ctrlProcess
//Generate a victim
when(askAllocate && olderWay.tags.loaded){
when(olderWay.tags.owners.orR) {
askProbe := True
gsPendingVictim := True
} otherwise {
toReadBackend.toWriteBackend := True
}
when(olderWay.tags.dirty || olderWay.tags.trunk) {
askReadBackend := True //TODO Seems like it would not be necessary if only olderWay.tags.trunk is set, only on dirty
gsPendingVictim := True
gsPendingVictimReadWrite := True
}
toReadBackend.address := olderWay.address
toReadBackend.size := log2Up(blockSize)
toReadBackend.wayId := olderWay.wayId
prober.cmd.opcode := CtrlOpcode.EVICT
prober.cmd.address := olderWay.address
prober.cmd.size := log2Up(blockSize)
prober.cmd.mask := olderWay.tags.owners
prober.cmd.probeToN := True
prober.cmd.evictClean := !olderWay.tags.dirty
when(!olderWay.unlocked){
//Assume it come from A (inclusive)
assert(!isValid || preCtrl.FROM_A)
redoUpA := True
}
}
assert(!(isValid && CTRL_CMD.probed && askAllocate))
when(preCtrl.IS_EVICT){
askWriteBackend := True
toWriteBackend.evict := True
toWriteBackend.toDownA := True
toWriteBackend.size := log2Up(blockSize)
}
if(withFlush) when(preCtrl.IS_FLUSH){
gsPendingPrimary := False
cache.tags.write.data.loaded := False
when(CACHE_HIT) {
when(CACHE_LINE.owners.orR) {
askProbe := True
gsPendingVictim := True
} otherwise {
toReadBackend.toWriteBackend := True
}
when(CACHE_LINE.dirty || CACHE_LINE.trunk) {
askReadBackend := True
gsPendingVictim := True
gsPendingVictimReadWrite := True
}
prober.cmd.opcode := CtrlOpcode.EVICT
prober.cmd.mask := CACHE_LINE.owners
prober.cmd.probeToN := True
prober.cmd.evictClean := !CACHE_LINE.dirty
}
when(doIt){
flush.fsm.inflight.decrementIt := True
}
}
//May not CACHE_HIT
when(preCtrl.FROM_C_RELEASE){
//Update tags
owners.remove setWhen (CTRL_CMD.toNone)
cache.tags.write.valid := True
cache.tags.write.data.trunk := False
//Write to backend
when(preCtrl.WRITE_DATA){
askGs := True
askWriteBackend := True
cache.tags.write.data.dirty := True
gsWrite := True
when(CACHE_HIT){
toWriteBackend.toUpD := Cache.ToUpDOpcode.RELEASE_ACK()
} otherwise {
toWriteBackend.toDownA := True
ctxDownD.data.toUpD := True
}
} otherwise {
askUpD := True
toUpD.opcode := Opcode.D.RELEASE_ACK
}
}
when(preCtrl.IS_RELEASE){
when(isFireing){
for(s <- prober.slots){
when(s.address === CTRL_CMD.address(blockRange)){
when(preCtrl.WRITE_DATA) {
s.evictClean := True
}
}
}
}
}
val getPutNeedProbe = CACHE_HIT && ANY && !CTRL_CMD.probed && (!preCtrl.IS_GET || CACHE_LINE.trunk)
when(preCtrl.GET_PUT){
gsWrite := !preCtrl.IS_GET
owners.clean setWhen(preCtrl.IS_PUT)
cache.tags.write.data.trunk := False
//Ensure that the cache.others is cleared on PUT
when(CACHE_HIT){
cache.tags.write.valid := True
}
when(getPutNeedProbe){
askProbe := True
prober.cmd.mask := CACHE_LINE.owners
prober.cmd.probeToN := !preCtrl.IS_GET
//TODO ensure that once the probe is done, the initial request isn't overtaken by another one (ex acquire)
} otherwise {
when(CACHE_HIT) {
events.getPut.hit setWhen(doIt)
when(CTRL_CMD.withDataUpC){
askWriteBackend := True
cache.tags.write.data.dirty := True
} otherwise {
askOrdering := True
askReadBackend := preCtrl.IS_GET
toReadBackend.toUpD := True
toReadBackend.upD.opcode := Opcode.D.ACCESS_ACK_DATA
when(preCtrl.IS_PUT) {
askWriteBackend := True
cache.tags.write.data.dirty := True
}
}
toWriteBackend.toUpD := preCtrl.IS_GET.mux(
Cache.ToUpDOpcode.ACCESS_ACK_DATA(),
Cache.ToUpDOpcode.ACCESS_ACK()
)
}.elsewhen(preCtrl.ALLOCATE_ON_MISS) {
events.getPut.miss setWhen (doIt)
askOrdering := True
askAllocate := True
ctxDownD.data.toCache := True
when(preCtrl.IS_PUT_FULL_BLOCK){
askWriteBackend := True
toWriteBackend.toUpD := Cache.ToUpDOpcode.ACCESS_ACK
} otherwise {
askReadDown := True
}
gsRefill := True
when(preCtrl.IS_GET) {
ctxDownD.data.toUpD := True
}
when(preCtrl.IS_PUT) {
ctxDownD.data.mergeBufferA := True
cache.tags.write.data.dirty := True
}
}.otherwise {
askOrdering := True
askReadDown := preCtrl.IS_GET
toWriteBackend.toDownA := True
askWriteBackend := !preCtrl.IS_GET
ctxDownD.data.toUpD := True
}
}
}
val aquireToB = !CTRL_CMD.toTrunk && OTHER
val acquireParam = aquireToB.mux[Bits](Param.Cap.toB, Param.Cap.toT)
when(preCtrl.ACQUIRE){
when(!CACHE_HIT){
events.acquire.miss setWhen (doIt)
owners.clean := True
owners.add := True
askOrdering := True
askAllocate := True
ctxDownD.data.toUpD := True
ctxDownD.data.toCache := True
ctxDownD.data.toT := !aquireToB
cache.tags.write.data.trunk := !aquireToB
when(CTRL_CMD.opcode === CtrlOpcode.ACQUIRE_BLOCK) {
askReadDown := True
gsRefill := True
} otherwise {
askUpD := True
toUpD.opcode := Opcode.D.GRANT
clearPrimary := True
}
}otherwise{
//Need probing ?
when(!CTRL_CMD.probed && (CACHE_LINE.trunk || CTRL_CMD.toTrunk && OTHER)) {
askProbe := True
prober.cmd.mask := OTHERS
prober.cmd.probeToN := CTRL_CMD.toTrunk
} otherwise {
events.acquire.hit setWhen (doIt)
when(aquireToB) {
ctxDownD.data.toT := False
} otherwise {
owners.clean := True
}
owners.add := True
cache.tags.write.data.trunk := !aquireToB
//TODO warning gs may will complet before writebackend is done !
when(CTRL_CMD.withDataUpC) {
askWriteBackend := True
cache.tags.write.data.dirty := True
}
when(CTRL_CMD.opcode === CtrlOpcode.ACQUIRE_BLOCK && !SELF) {
toReadBackend.toUpD := True
toReadBackend.upD.opcode := Opcode.D.GRANT_DATA
toReadBackend.upD.param := acquireParam.resized
toWriteBackend.toT := !aquireToB
toWriteBackend.toUpD := Cache.ToUpDOpcode.GRANT_DATA()
when(!CTRL_CMD.withDataUpC){
askOrdering := True
askReadBackend := True
}
} otherwise {
askOrdering := True
askUpD := True
clearPrimary := True
toUpD.opcode := Opcode.D.GRANT
toUpD.param := acquireParam.resized
assert(!(isValid && CTRL_CMD.withDataUpC))
}
}
}
}
when(!doIt){
cache.tags.write.valid := False
}
when(!preCtrl.PROBE_REGION){
assert(!(isValid && preCtrl.ACQUIRE))
askProbe := False
}
}
}
val readDown = new Area {
val cmd = ctrl.process.toReadDown.pipelined(m2s = true, s2m = true)
val toDownA = cmd.swapPayload(io.down.a.payloadType())
toDownA.opcode := Opcode.A.GET
toDownA.param := 0
toDownA.source := U"0" @@ cmd.gsId
toDownA.address := cmd.address
toDownA.size := cmd.size
toDownA.mask.assignDontCare()
toDownA.data.assignDontCare()
toDownA.corrupt := False
toDownA.debugId := DebugId.withPostfix(toDownA.source)
}
case class PutMergeCmd() extends Bundle{
val gsId = GS_ID()
val setId = UInt(setsRange.size bits)
val wayId = UInt(log2Up(cacheWays) bits)
val wordOffset = UInt(wordRange.size bits)
val bufferAId = BUFFER_A_ID()
val source = ubp.source()
val size = ubp.size()
}
/*
Victim buffer will halt :
- writeBackend from reading victim
- writeBackend from writing cache
- fromDownD from writing cache
*/
val readBackend = new Pipeline {
val stages = newChained(p.readProcessAt+1, Connection.M2S(collapse = true))
val inserterStage = stages(0)
val fetchStage = stages(0)
val readStage = stages(1)
val processStage = stages(p.readProcessAt)
val CMD = Stageable(new ReadBackendCmd())
def victimAddress(stage : Stage) = stage(CMD).gsId @@ stage(CMD).address(wordRange)
val inserter = new Area {
import inserterStage._
val cmd = ctrl.process.toReadBackend.pipelined(m2s = true, s2m = true)
val counter = Reg(io.up.p.beat()) init (0)
val LAST = insert(counter === sizeToBeatMinusOne(io.up.p, cmd.size))
val FIRST = insert(counter === 0)
val WRITE_FORK = insert(CMD.toWriteBackend && FIRST)
cmd.ready := isReady && LAST
valid := cmd.valid
inserterStage(CMD) := cmd.payload
CMD.address.removeAssignments() := cmd.address | (counter << log2Up(p.dataBytes)).resized
when(isFireing) {
counter := counter + 1
when(LAST) {
counter := 0
}
}
}
val fetcher = new Area {
import fetchStage._
cache.data.read.valid := isFireing
cache.data.read.payload := CMD.wayId @@ CMD.address(setsRange.high downto wordRange.low)
when(isFireing && CMD.toVictim && inserter.FIRST) {
gs.slots.onSel(CMD.gsId) { s =>
s.pending.victimRead := False
}
}
}
val CACHED = readStage.insert(Vec(cache.data.banks.map(_.readed))) //May want KEEP attribute
val process = new Area {
import processStage._
val DATA = insert(CACHED(CMD.address(log2Up(p.dataBytes), log2Up(cacheBanks) bits)))
val toUpDFork = forkStream(CMD.toUpD)
val toUpD = toUpDFork swapPayload io.up.d.payloadType()
toUpD.opcode := CMD.upD.opcode
toUpD.param := CMD.upD.param
toUpD.source := CMD.upD.source
toUpD.sink := CMD.gsId
toUpD.size := CMD.size
toUpD.denied := False
toUpD.data := DATA
toUpD.corrupt := False
val toVictimFork = forkFlow(CMD.toVictim)
victimBuffer.write.valid := toVictimFork.valid
victimBuffer.write.address := victimAddress(processStage)
victimBuffer.write.data := DATA
val gsOh = UIntToOh(CMD.gsId, generalSlotCount)
when(isFireing && CMD.toVictim && inserter.FIRST) {
gs.slots.onMask(gsOh) { s =>
s.pending.victimWrite := False
}
}
when(isFireing && inserter.LAST) {
when(CMD.toUpD) {
gs.slots.onMask(gsOh)(_.pending.primary := False)
}
}
val toWriteBackendFork = forkStream(inserter.WRITE_FORK)
val toWriteBackend = toWriteBackendFork.swapPayload(new WriteBackendCmd())
toWriteBackend.fromUpA := False
toWriteBackend.fromUpC := False
toWriteBackend.toDownA := True
toWriteBackend.toUpD := Cache.ToUpDOpcode.NONE
toWriteBackend.toT := True
toWriteBackend.source := 0
toWriteBackend.gsId := CMD.gsId
toWriteBackend.partialUpA := False
toWriteBackend.address := CMD.address
toWriteBackend.size := log2Up(blockSize)
toWriteBackend.wayId := 0
toWriteBackend.bufferAId := 0
toWriteBackend.evict := True
toWriteBackend.debugId := 0
}
}
val writeBackend = new Pipeline {
val stages = newChained(3, Connection.M2S())
val inserterStage = stages(0)
val fetchStage = stages(0)
val readStage = stages(1)
val processStage = stages(2)
val CMD = Stageable(new WriteBackendCmd())
// Put merge is used on upA put which trigger a cache line load
val putMerges = ubp.withDataA generate new Area {
val push = Stream(PutMergeCmd())
val fifo = StreamFifo(PutMergeCmd(), Math.min(generalSlotCount, aBufferCount))
fifo.io.push << push
val buffered = fifo.io.pop.combStage()
val cmd = buffered.swapPayload(CMD())
cmd.fromUpA := True
cmd.toDownA := False
cmd.toUpD := Cache.ToUpDOpcode.ACCESS_ACK
cmd.gsId := buffered.gsId
cmd.partialUpA := False
cmd.address.assignDontCare()
cmd.address(setsRange) := buffered.setId
cmd.address(wordRange) := buffered.wordOffset
cmd.size := buffered.size
cmd.wayId := buffered.wayId
cmd.bufferAId := buffered.bufferAId
cmd.fromUpC := False
cmd.toT := True
cmd.source := buffered.source
cmd.evict := False
}
val inserter = new Area {
import inserterStage._
val ctrlBuffered = ctrl.process.toWriteBackend
val fromReadBackend = readBackend.process.toWriteBackend.s2mPipe().queue(generalSlotCount).halfPipe()//TODO not that great for area
val arbiterInputs = ArrayBuffer[Stream[WriteBackendCmd]]()
arbiterInputs += ctrlBuffered
if(ubp.withDataA) arbiterInputs += putMerges.cmd
arbiterInputs += fromReadBackend
val arbiter = StreamArbiterFactory().lowerFirst.transactionLock.buildOn(arbiterInputs)
val cmd = arbiter.io.output.pipelined(m2s = true, s2m = true)
val counter = Reg(io.up.p.beat()) init (0)
val upABeatsMinusOne = sizeToBeatMinusOne(io.up.p, cmd.size)
val beatMax = CMD.fromUpC.mux(U(ubp.beatMax-1), upABeatsMinusOne)
val LAST = insert(counter === beatMax)
val addressWord = cmd.address(wordRange)
val IN_UP_A = insert(!CMD.fromUpC || counter >= addressWord && counter <= addressWord + upABeatsMinusOne)
cmd.ready := isReady && LAST
valid := cmd.valid
inserterStage(CMD) := cmd.payload
val addressBase = cmd.address(refillRange) @@ cmd.address(refillRange.low-1 downto 0).andMask(!CMD.fromUpC)
CMD.address.removeAssignments() := addressBase | (counter << log2Up(p.dataBytes)).resized
when(isFireing) {
counter := counter + 1
when(LAST) {
counter := 0
}
}
}
val fetch = new Area{
import fetchStage._
if(ubp.withDataA) {
fromUpA.buffer.read.cmd.valid := fetchStage.isFireing
fromUpA.buffer.read.cmd.payload := fetchStage(CMD).bufferAId @@ fetchStage(CMD).address(wordRange)
when(isFireing && inserter.LAST && CMD.fromUpA) {
fromUpA.buffer.clear(fetchStage(CMD).bufferAId) := True
}
}
victimBuffer.read.cmd.valid := fetchStage.isFireing
victimBuffer.read.cmd.payload := fetchStage(CMD).gsId @@ fetchStage(CMD).address(wordRange)
val vh = readBackend.processStage
val victimWrite = gs.slots.reader(CMD.gsId)(_.pending.victimWrite)
val victimWriteOnGoing = vh.valid &&
vh(readBackend.CMD).toVictim &&
readBackend.victimAddress(vh) === (CMD.gsId @@ CMD.address(wordRange))
val victimHazard = CMD.evict && (victimWrite || victimWriteOnGoing)
haltWhen(victimHazard)
}
val BUFFER_A = ubp.withDataA generate readStage.insert(fromUpA.buffer.read.rsp)
val VICTIM = readStage.insert(victimBuffer.read.rsp)
val process = new Area {
import processStage._
val acMergeLogic = ubp.withDataA generate new Area {
val aSplit = BUFFER_A.data.subdivideIn(8 bits)
val cSplit = upCSplit.dataPop.payload.subdivideIn(8 bits)
val selA = inserter.IN_UP_A && CMD.fromUpA
val result = (0 until dataBytes).map(i => (selA && BUFFER_A.mask(i)).mux(aSplit(i), cSplit(i)))
}
val UP_DATA = insert((CMD.evict && !CMD.fromUpC).mux[Bits](VICTIM, if(ubp.withDataA) acMergeLogic.result.asBits else upCSplit.dataPop.payload))
val UP_MASK = insert(Bits(ubp.dataBytes bits).setAll())
if(ubp.withDataA) when(!(CMD.fromUpC || CMD.evict)){
UP_MASK := BUFFER_A.mask
}
val hazardUpC = CMD.fromUpC && !upCSplit.dataPop.valid
upCSplit.dataPop.ready := CMD.fromUpC && isFireing
val vh = readBackend.fetchStage
val victimRead = gs.slots.reader(CMD.gsId)(_.pending.victimRead)
val victimReadOnGoing = vh.valid &&
vh(readBackend.CMD).toVictim &&
readBackend.victimAddress(vh) === (CMD.gsId @@ CMD.address(wordRange))
val victimHazard = CMD.toCache && (victimRead || victimReadOnGoing)
val toCacheFork = forkStream(CMD.toCache)
cache.data.upWrite.arbitrationFrom(toCacheFork.haltWhen(hazardUpC || victimHazard))
cache.data.upWrite.address := CMD.wayId @@ CMD.address(setsRange.high downto wordRange.low)
cache.data.upWrite.data := UP_DATA
cache.data.upWrite.mask := UP_MASK
val toDownAFork = forkStream(CMD.toDownA)
val toDownA = toDownAFork.haltWhen(hazardUpC).swapPayload(io.down.a.payloadType)
toDownA.opcode := (CMD.fromUpA && CMD.partialUpA).mux(Opcode.A.PUT_PARTIAL_DATA, Opcode.A.PUT_FULL_DATA)
toDownA.param := 0
toDownA.source := U(CMD.evict) @@ CMD.gsId
toDownA.address := CMD.address
toDownA.size := CMD.fromUpC.mux(U(log2Up(blockSize)), CMD.size)
toDownA.data := UP_DATA
toDownA.mask := UP_MASK
toDownA.corrupt := False
toDownA.debugId := DebugId.withPostfix(toDownA.source)
import Cache.ToUpDOpcode._
val needForkToUpD = CMD.toUpD.muxDc[Bool](
NONE -> False,
ACCESS_ACK -> inserter.LAST,
RELEASE_ACK -> inserter.LAST,
GRANT -> inserter.LAST,
ACCESS_ACK_DATA -> inserter.IN_UP_A,
GRANT_DATA -> True
)
val toUpDFork = forkStream(needForkToUpD)
val toUpD = toUpDFork.haltWhen(victimHazard || hazardUpC).swapPayload(io.up.d.payloadType)
toUpD.opcode := CMD.toUpD.muxDc(
ACCESS_ACK -> Opcode.D.ACCESS_ACK(),
ACCESS_ACK_DATA -> Opcode.D.ACCESS_ACK_DATA(),
GRANT -> Opcode.D.GRANT(),
GRANT_DATA -> Opcode.D.GRANT_DATA(),
RELEASE_ACK -> Opcode.D.RELEASE_ACK()
)
toUpD.param := CMD.toT.mux[Bits](Param.Cap.toT, Param.Cap.toB).resized
toUpD.source := CMD.source
toUpD.sink := CMD.gsId
toUpD.size := CMD.size
toUpD.denied := False
toUpD.data := upCSplit.dataPop.payload //as it never come from a upA put
toUpD.corrupt := False
when(isFireing && inserter.LAST) {
when(CMD.toUpD =/= NONE) {
gs.slots.onSel(CMD.gsId) { s =>
s.pending.primary := False
}
}
}
val toOrdering = Flow(io.ordering.writeBackend.payloadType)
toOrdering.valid := toUpD.fire && toUpD.isLast() && List(ACCESS_ACK, ACCESS_ACK_DATA, GRANT, GRANT_DATA).map(_()).sContains(CMD.toUpD)
toOrdering.debugId := CMD.debugId
toOrdering.bytes := (U(1) << CMD.size).resized
toOrdering >> io.ordering.writeBackend
}
}
val toDownA = new Area{
val arbiter = StreamArbiterFactory().lowerFirst.lambdaLock[ChannelA](_.isLast()).build(io.down.a.payloadType, 2)
arbiter.io.inputs(0) << readDown.toDownA
arbiter.io.inputs(1) << writeBackend.process.toDownA
io.down.a << arbiter.io.output
}
val fromDownD = new Pipeline{
val stages = newChained(3, Connection.M2S())
val inserterStage = stages(0)
val fetchStage = stages(0)
val readStage = stages(1)
val preprocessStage = stages(1)
val processStage = stages(2)
val CTX = Stageable(new CtxDownD())
val inserter = new Area{
import inserterStage._
driveFrom(io.down.d)
val CMD = insert(io.down.d.payload)
val LAST = insert(io.down.d.isLast())
val BEAT = insert(io.down.d.beatCounter())
}
import inserter._
val readPort = gs.ctxDownD.ram.readSyncPort()
readPort.cmd.valid := fetchStage.isFireing
readPort.cmd.payload := fetchStage(CMD).source.resized
readStage(CTX) := readPort.rsp
val preprocess = new Area{
import preprocessStage._
val withData = insert(CMD.opcode === Opcode.D.ACCESS_ACK_DATA)
val toUpDHead = insert(!withData || !CTX.toCache || (BEAT >= CTX.wordOffset && BEAT <= CTX.wordOffset + sizeToBeatMinusOne(io.down.p, CTX.size)))
}
val process = new Area{
import preprocess._
import processStage._
val isVictim = CMD.source.msb
val toCache = forkStream(!isVictim && CTX.toCache).swapPayload(cache.data.downWrite.payloadType)
toCache.address := CTX.wayId @@ CTX.setId @@ BEAT
toCache.data := CMD.data
toCache.mask.setAll()
val gsId = CMD.source.resize(log2Up(gs.slots.size))
val vh = readBackend.fetchStage
val victimRead = gs.slots.reader(gsId)(_.pending.victimWrite) //Here we pick victim write to be sure we don't create a dead lock
val victimOnGoing = vh.valid && vh(readBackend.CMD).toVictim && readBackend.victimAddress(vh) === (gsId @@ BEAT)
val victimHazard = victimRead || victimOnGoing
toCache.haltWhen(victimHazard) >-> cache.data.downWrite
//TODO handle refill while partial get to upD
val toUpDFork = forkStream(!isVictim && CTX.toUpD && toUpDHead)
val toUpD = toUpDFork.haltWhen(toCache.valid && victimHazard).swapPayload(io.up.d.payloadType)
toUpD.opcode := CTX.acquire.mux(
withData.mux(Opcode.D.GRANT_DATA, Opcode.D.GRANT),
CTX.release.mux(
Opcode.D.RELEASE_ACK(),
withData.mux(Opcode.D.ACCESS_ACK_DATA, Opcode.D.ACCESS_ACK)
)
)
toUpD.param := CTX.toT.mux[Bits](Param.Cap.toT, Param.Cap.toB).resized
toUpD.source := CTX.sourceId
toUpD.sink := CMD.source.resized
toUpD.size := CTX.size
toUpD.denied := CMD.denied
toUpD.data := CMD.data
toUpD.corrupt := CMD.corrupt
def putMerges = writeBackend.putMerges.push
if(ubp.withDataA) {
putMerges.valid := False
putMerges.gsId := CMD.source.resized
putMerges.setId := CTX.setId
putMerges.wayId := CTX.wayId
putMerges.wordOffset := CTX.wordOffset
putMerges.bufferAId := CTX.bufferAId
putMerges.size := CTX.size
putMerges.source := CTX.sourceId
assert(!writeBackend.putMerges.push.isStall)
}
when(isFireing && LAST) {
when(isVictim) {
gs.slots.onSel(CMD.source.resized)(_.pending.victim := False)
} otherwise {
when(CTX.mergeBufferA) {
if(ubp.withDataA) writeBackend.putMerges.push.valid := True
}.otherwise{
gs.slots.onSel(CMD.source.resized)(_.pending.primary := False)
}
}
}
}
}
val toUpD = new Area{
val arbiter = StreamArbiterFactory().lowerFirst.lambdaLock[ChannelD](_.isLast()).build(io.up.d.payloadType, 4)
arbiter.io.inputs(0) << fromDownD.process.toUpD//.m2sPipe()
arbiter.io.inputs(1) << ctrl.process.toUpD.m2sPipe()
arbiter.io.inputs(2) << readBackend.process.toUpD.s2mPipe()
arbiter.io.inputs(3) << writeBackend.process.toUpD
io.up.d << arbiter.io.output
}
val fromUpE = new Area{
io.up.e.ready := True
when(io.up.e.fire){
gs.slots.onSel(io.up.e.sink)(_.pending.acquire := False)
}
}
ctrl.build()
readBackend.build()
writeBackend.build()
fromDownD.build()
when(!initializer.done) {
cache.tags.write.valid := True
cache.tags.write.address := initializer.counter.resized
cache.tags.write.mask.setAll()
cache.tags.write.data.clearAll()
cache.plru.write.valid := True
cache.plru.write.address := initializer.counter.resized
cache.plru.write.data.clearAll()
}
}
object DirectoryGen extends App{
def basicConfig(generalSlotCount : Int = 8,
probeCount: Int = 4,
downPendingMax: Int = 16,
masterPerChannel: Int = 4,
dataWidth: Int = 64,
addressWidth: Int = 32,
lockSets: Int = 64*1024/64,
cacheBytes : Int = 64*1024,
cacheWays : Int = 8) = {
val blockSize = 64
CacheParam(
unp = NodeParameters(
m = M2sParameters(
addressWidth = addressWidth,
dataWidth = dataWidth,
masters = List.tabulate(masterPerChannel)(mId =>
M2sAgent(
name = null,
mapping = List.fill(1)(M2sSource(
emits = M2sTransfers(
get = SizeRange(64),
putFull = SizeRange(64),
putPartial = SizeRange(64),
acquireT = SizeRange(64),
acquireB = SizeRange(64)
),
id = SizeMapping(mId * 4, 4)
))
)
)
),
s = S2mParameters(List(
S2mAgent(
name = null,
emits = S2mTransfers(
probe = SizeRange(64)
),
sinkId = SizeMapping(0, generalSlotCount)
)
))
),
cacheWays = cacheWays,
cacheBytes = cacheBytes,
aBufferCount = 4,
downPendingMax = downPendingMax,
probeCount = probeCount,
blockSize = blockSize,
coherentRegion = _ => True,
generalSlotCount = generalSlotCount,
allocateOnMiss = (_,_,_,_,_) => True
)
}
SpinalVerilog(new Cache(basicConfig()))
import spinal.lib.eda.bench._
val rtls = ArrayBuffer[Rtl]()
for (probeCount <- List(2)) { //Rtl.ffIo
rtls += Rtl(SpinalVerilog((new Cache(basicConfig(dataWidth = 16, addressWidth = 32, cacheWays = 4,cacheBytes = 128*1024)).setDefinitionName(s"Hub$probeCount"))))
}
val targets = XilinxStdTargets().take(2)
Bench(rtls, targets)
}
/*
tricky cases :
- release while a probe is going on
- release data just before victim probe logic is enabled => think data are still in the victim buffer, while is already written to memory by release data
- acquire T then release data before the victim of the acquire got time to read the $ and get overriden by release data
*/
© 2015 - 2025 Weber Informatics LLC | Privacy Policy