minor update

This commit is contained in:
Blaise Tine 2024-04-30 22:47:59 -07:00
parent aea1d2c8eb
commit 5ea10fd872
9 changed files with 27 additions and 28 deletions

View file

@ -52,7 +52,7 @@ llvm()
libcrt32()
{
echo "prebuilt libcrt32..."
tar -C $TOOLDIR -cvjf libcrt32.tar.bz2 libc32
tar -C $TOOLDIR -cvjf libcrt32.tar.bz2 libcrt32
mkdir -p ./libcrt32
mv libcrt32.tar.bz2 ./libcrt32
}
@ -60,7 +60,7 @@ libcrt32()
libcrt64()
{
echo "prebuilt libcrt64..."
tar -C $TOOLDIR -cvjf libcrt64.tar.bz2 libc64
tar -C $TOOLDIR -cvjf libcrt64.tar.bz2 libcrt64
mkdir -p ./libcrt64
mv libcrt64.tar.bz2 ./libcrt64
}

View file

@ -384,27 +384,27 @@ module VX_decode #(
end
`ifdef FLEN_64
5'b01000: begin
// CVT.S.D, CVT.D.S
// FCVT.S.D, FCVT.D.S
op_type = `INST_OP_BITS'(`INST_FPU_F2F);
`USED_FREG (rd);
`USED_FREG (rs1);
end
`endif
5'b01011: begin
// SQRT
// FSQRT
op_type = `INST_OP_BITS'(`INST_FPU_SQRT);
`USED_FREG (rd);
`USED_FREG (rs1);
end
5'b10100: begin
// CMP
// FCMP
op_type = `INST_OP_BITS'(`INST_FPU_CMP);
`USED_IREG (rd);
`USED_FREG (rs1);
`USED_FREG (rs2);
end
5'b11000: begin
// CVT.W.X, CVT.WU.X
// FCVT.W.X, FCVT.WU.X
op_type = (rs2[0]) ? `INST_OP_BITS'(`INST_FPU_F2U) : `INST_OP_BITS'(`INST_FPU_F2I);
`ifdef XLEN_64
imm[1] = rs2[1]; // is 64-bit integer
@ -413,7 +413,7 @@ module VX_decode #(
`USED_FREG (rs1);
end
5'b11010: begin
// CVT.X.W, CVT.X.WU
// FCVT.X.W, FCVT.X.WU
op_type = (rs2[0]) ? `INST_OP_BITS'(`INST_FPU_U2F) : `INST_OP_BITS'(`INST_FPU_I2F);
`ifdef XLEN_64
imm[1] = rs2[1]; // is 64-bit integer

View file

@ -82,12 +82,12 @@ Emulator::~Emulator() {
}
void Emulator::clear() {
uint32_t startup_addr = dcrs_.base_dcrs.read(VX_DCR_BASE_STARTUP_ADDR0);
uint64_t startup_addr = dcrs_.base_dcrs.read(VX_DCR_BASE_STARTUP_ADDR0);
#if (XLEN == 64)
startup_addr |= (uint64_t(dcrs_.base_dcrs.read(VX_DCR_BASE_STARTUP_ADDR1)) << 32);
#endif
uint32_t startup_arg = dcrs_.base_dcrs.read(VX_DCR_BASE_STARTUP_ARG0);
uint64_t startup_arg = dcrs_.base_dcrs.read(VX_DCR_BASE_STARTUP_ARG0);
#if (XLEN == 64)
startup_arg |= (uint64_t(dcrs_.base_dcrs.read(VX_DCR_BASE_STARTUP_ARG1)) << 32);
#endif

View file

@ -38,9 +38,9 @@ struct LsuTraceData : public ITraceData {
struct SFUTraceData : public ITraceData {
using Ptr = std::shared_ptr<SFUTraceData>;
uint32_t arg1;
uint32_t arg2;
SFUTraceData(uint32_t arg1, uint32_t arg2) : arg1(arg1), arg2(arg2) {}
Word arg1;
Word arg2;
SFUTraceData(Word arg1, Word arg2) : arg1(arg1), arg2(arg2) {}
};
struct instr_trace_t {

View file

@ -110,7 +110,6 @@ void ProcessorImpl::reset() {
perf_mem_writes_ = 0;
perf_mem_latency_ = 0;
perf_mem_pending_reads_ = 0;
}
void ProcessorImpl::dcr_write(uint32_t addr, uint32_t value) {

View file

@ -61,7 +61,7 @@ int main(int argc, char **argv)
*h_X,
*h_T;
const unsigned int optionCount = 64*64;
const unsigned int optionCount = 16*16; //64*64;
const float R = 0.02f;
const float V = 0.30f;
@ -70,7 +70,7 @@ int main(int argc, char **argv)
// Get the NVIDIA platform
ciErrNum = oclGetPlatformID(&cpPlatform);
oclCheckErrorEX(ciErrNum, CL_SUCCESS, NULL);
shrLog("clGetPlatformID...\n");
shrLog("clGetPlatformID...\n");
//Get all the devices
cl_uint uiNumDevices = 0; // Number of devices available
@ -84,20 +84,20 @@ int main(int argc, char **argv)
oclCheckErrorEX(ciErrNum, CL_SUCCESS, NULL);
// Get command line device options and config accordingly
shrLog(" # of Devices Available = %u\n", uiNumDevices);
if(shrGetCmdLineArgumentu(argc, (const char**)argv, "device", &uiTargetDevice)== shrTRUE)
shrLog(" # of Devices Available = %u\n", uiNumDevices);
if(shrGetCmdLineArgumentu(argc, (const char**)argv, "device", &uiTargetDevice)== shrTRUE)
{
uiTargetDevice = CLAMP(uiTargetDevice, 0, (uiNumDevices - 1));
}
shrLog(" Using Device %u: ", uiTargetDevice);
shrLog(" Using Device %u: ", uiTargetDevice);
oclPrintDevName(LOGBOTH, cdDevices[uiTargetDevice]);
ciErrNum = clGetDeviceInfo(cdDevices[uiTargetDevice], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(uiNumComputeUnits), &uiNumComputeUnits, NULL);
oclCheckErrorEX(ciErrNum, CL_SUCCESS, NULL);
shrLog("\n # of Compute Units = %u\n", uiNumComputeUnits);
shrLog("\n # of Compute Units = %u\n", uiNumComputeUnits);
// set logfile name and start logs
shrSetLogFileName ("oclBlackScholes.txt");
shrLog("%s Starting...\n\n", argv[0]);
shrLog("%s Starting...\n\n", argv[0]);
shrLog("Allocating and initializing host memory...\n");
h_CallCPU = (float *)malloc(optionCount * sizeof(float));
@ -191,7 +191,7 @@ int main(int argc, char **argv)
//Calculate performance metrics by wallclock time
double gpuTime = shrDeltaT(0) / numIterations;
shrLogEx(LOGBOTH | MASTER, 0, "oclBlackScholes, Throughput = %.4f GOptions/s, Time = %.5f s, Size = %u options, NumDevsUsed = %i, Workgroup = %u\n",
shrLogEx(LOGBOTH | MASTER, 0, "oclBlackScholes, Throughput = %.4f GOptions/s, Time = %.5f s, Size = %u options, NumDevsUsed = %i, Workgroup = %u\n",
(double)(2.0 * optionCount * 1.0e-9)/gpuTime, gpuTime, (2 * optionCount), 1, 0);
//Get profiling info
@ -208,7 +208,7 @@ int main(int argc, char **argv)
ciErrNum = clEnqueueReadBuffer(cqCommandQueue, d_Put, CL_TRUE, 0, optionCount * sizeof(float), h_PutGPU, 0, NULL, NULL);
oclCheckError(ciErrNum, CL_SUCCESS);
shrLog("Comparing against Host/C++ computation...\n");
shrLog("Comparing against Host/C++ computation...\n");
BlackScholesCPU(h_CallCPU, h_PutCPU, h_S, h_X, h_T, R, V, optionCount);
double deltaCall = 0, deltaPut = 0, sumCall = 0, sumPut = 0;
double L1call, L1put;
@ -219,7 +219,7 @@ int main(int argc, char **argv)
deltaCall += fabs(h_CallCPU[i] - h_CallGPU[i]);
deltaPut += fabs(h_PutCPU[i] - h_PutGPU[i]);
}
L1call = deltaCall / sumCall;
L1call = deltaCall / sumCall;
L1put = deltaPut / sumPut;
shrLog("Relative L1 (call, put) = (%.3e, %.3e)\n\n", L1call, L1put);

View file

@ -332,9 +332,9 @@ void kernel_trigo(int task_id, kernel_arg_t* __UNIFORM__ arg) {
auto dst_ptr = (float*)arg->dst_addr;
auto offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
auto a = sinf(src0_ptr[offset+i]);
auto b = cosf(src1_ptr[offset+i]);
dst_ptr[offset+i] = a + b;
auto a = src0_ptr[offset+i];
auto b = src1_ptr[offset+i];
dst_ptr[offset+i] = sinf(a * b);
}
}

View file

@ -752,7 +752,7 @@ public:
auto b = (float*)src2;
auto c = (float*)dst;
for (uint32_t i = 0; i < n; ++i) {
auto ref = sinf(a[i]) + cosf(b[i]);
auto ref = sinf(a[i] * b[i]);
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;

View file

@ -70,7 +70,7 @@ void gen_src_data(std::vector<TYPE>& src_data, uint32_t size) {
auto r = static_cast<float>(std::rand()) / RAND_MAX;
auto value = static_cast<TYPE>(r * size);
src_data[i] = value;
std::cout << std::dec << i << ": value=" << value << std::endl;
//std::cout << std::dec << i << ": value=" << value << std::endl;
}
}