mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
minor update
This commit is contained in:
parent
aea1d2c8eb
commit
5ea10fd872
9 changed files with 27 additions and 28 deletions
|
@ -52,7 +52,7 @@ llvm()
|
|||
libcrt32()
|
||||
{
|
||||
echo "prebuilt libcrt32..."
|
||||
tar -C $TOOLDIR -cvjf libcrt32.tar.bz2 libc32
|
||||
tar -C $TOOLDIR -cvjf libcrt32.tar.bz2 libcrt32
|
||||
mkdir -p ./libcrt32
|
||||
mv libcrt32.tar.bz2 ./libcrt32
|
||||
}
|
||||
|
@ -60,7 +60,7 @@ libcrt32()
|
|||
libcrt64()
|
||||
{
|
||||
echo "prebuilt libcrt64..."
|
||||
tar -C $TOOLDIR -cvjf libcrt64.tar.bz2 libc64
|
||||
tar -C $TOOLDIR -cvjf libcrt64.tar.bz2 libcrt64
|
||||
mkdir -p ./libcrt64
|
||||
mv libcrt64.tar.bz2 ./libcrt64
|
||||
}
|
||||
|
|
|
@ -384,27 +384,27 @@ module VX_decode #(
|
|||
end
|
||||
`ifdef FLEN_64
|
||||
5'b01000: begin
|
||||
// CVT.S.D, CVT.D.S
|
||||
// FCVT.S.D, FCVT.D.S
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_F2F);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
`endif
|
||||
5'b01011: begin
|
||||
// SQRT
|
||||
// FSQRT
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_SQRT);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
5'b10100: begin
|
||||
// CMP
|
||||
// FCMP
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_CMP);
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
5'b11000: begin
|
||||
// CVT.W.X, CVT.WU.X
|
||||
// FCVT.W.X, FCVT.WU.X
|
||||
op_type = (rs2[0]) ? `INST_OP_BITS'(`INST_FPU_F2U) : `INST_OP_BITS'(`INST_FPU_F2I);
|
||||
`ifdef XLEN_64
|
||||
imm[1] = rs2[1]; // is 64-bit integer
|
||||
|
@ -413,7 +413,7 @@ module VX_decode #(
|
|||
`USED_FREG (rs1);
|
||||
end
|
||||
5'b11010: begin
|
||||
// CVT.X.W, CVT.X.WU
|
||||
// FCVT.X.W, FCVT.X.WU
|
||||
op_type = (rs2[0]) ? `INST_OP_BITS'(`INST_FPU_U2F) : `INST_OP_BITS'(`INST_FPU_I2F);
|
||||
`ifdef XLEN_64
|
||||
imm[1] = rs2[1]; // is 64-bit integer
|
||||
|
|
|
@ -82,12 +82,12 @@ Emulator::~Emulator() {
|
|||
}
|
||||
|
||||
void Emulator::clear() {
|
||||
uint32_t startup_addr = dcrs_.base_dcrs.read(VX_DCR_BASE_STARTUP_ADDR0);
|
||||
uint64_t startup_addr = dcrs_.base_dcrs.read(VX_DCR_BASE_STARTUP_ADDR0);
|
||||
#if (XLEN == 64)
|
||||
startup_addr |= (uint64_t(dcrs_.base_dcrs.read(VX_DCR_BASE_STARTUP_ADDR1)) << 32);
|
||||
#endif
|
||||
|
||||
uint32_t startup_arg = dcrs_.base_dcrs.read(VX_DCR_BASE_STARTUP_ARG0);
|
||||
uint64_t startup_arg = dcrs_.base_dcrs.read(VX_DCR_BASE_STARTUP_ARG0);
|
||||
#if (XLEN == 64)
|
||||
startup_arg |= (uint64_t(dcrs_.base_dcrs.read(VX_DCR_BASE_STARTUP_ARG1)) << 32);
|
||||
#endif
|
||||
|
|
|
@ -38,9 +38,9 @@ struct LsuTraceData : public ITraceData {
|
|||
|
||||
struct SFUTraceData : public ITraceData {
|
||||
using Ptr = std::shared_ptr<SFUTraceData>;
|
||||
uint32_t arg1;
|
||||
uint32_t arg2;
|
||||
SFUTraceData(uint32_t arg1, uint32_t arg2) : arg1(arg1), arg2(arg2) {}
|
||||
Word arg1;
|
||||
Word arg2;
|
||||
SFUTraceData(Word arg1, Word arg2) : arg1(arg1), arg2(arg2) {}
|
||||
};
|
||||
|
||||
struct instr_trace_t {
|
||||
|
|
|
@ -110,7 +110,6 @@ void ProcessorImpl::reset() {
|
|||
perf_mem_writes_ = 0;
|
||||
perf_mem_latency_ = 0;
|
||||
perf_mem_pending_reads_ = 0;
|
||||
|
||||
}
|
||||
|
||||
void ProcessorImpl::dcr_write(uint32_t addr, uint32_t value) {
|
||||
|
|
|
@ -61,7 +61,7 @@ int main(int argc, char **argv)
|
|||
*h_X,
|
||||
*h_T;
|
||||
|
||||
const unsigned int optionCount = 64*64;
|
||||
const unsigned int optionCount = 16*16; //64*64;
|
||||
const float R = 0.02f;
|
||||
const float V = 0.30f;
|
||||
|
||||
|
@ -70,7 +70,7 @@ int main(int argc, char **argv)
|
|||
// Get the NVIDIA platform
|
||||
ciErrNum = oclGetPlatformID(&cpPlatform);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, NULL);
|
||||
shrLog("clGetPlatformID...\n");
|
||||
shrLog("clGetPlatformID...\n");
|
||||
|
||||
//Get all the devices
|
||||
cl_uint uiNumDevices = 0; // Number of devices available
|
||||
|
@ -84,20 +84,20 @@ int main(int argc, char **argv)
|
|||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, NULL);
|
||||
|
||||
// Get command line device options and config accordingly
|
||||
shrLog(" # of Devices Available = %u\n", uiNumDevices);
|
||||
if(shrGetCmdLineArgumentu(argc, (const char**)argv, "device", &uiTargetDevice)== shrTRUE)
|
||||
shrLog(" # of Devices Available = %u\n", uiNumDevices);
|
||||
if(shrGetCmdLineArgumentu(argc, (const char**)argv, "device", &uiTargetDevice)== shrTRUE)
|
||||
{
|
||||
uiTargetDevice = CLAMP(uiTargetDevice, 0, (uiNumDevices - 1));
|
||||
}
|
||||
shrLog(" Using Device %u: ", uiTargetDevice);
|
||||
shrLog(" Using Device %u: ", uiTargetDevice);
|
||||
oclPrintDevName(LOGBOTH, cdDevices[uiTargetDevice]);
|
||||
ciErrNum = clGetDeviceInfo(cdDevices[uiTargetDevice], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(uiNumComputeUnits), &uiNumComputeUnits, NULL);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, NULL);
|
||||
shrLog("\n # of Compute Units = %u\n", uiNumComputeUnits);
|
||||
shrLog("\n # of Compute Units = %u\n", uiNumComputeUnits);
|
||||
|
||||
// set logfile name and start logs
|
||||
shrSetLogFileName ("oclBlackScholes.txt");
|
||||
shrLog("%s Starting...\n\n", argv[0]);
|
||||
shrLog("%s Starting...\n\n", argv[0]);
|
||||
|
||||
shrLog("Allocating and initializing host memory...\n");
|
||||
h_CallCPU = (float *)malloc(optionCount * sizeof(float));
|
||||
|
@ -191,7 +191,7 @@ int main(int argc, char **argv)
|
|||
|
||||
//Calculate performance metrics by wallclock time
|
||||
double gpuTime = shrDeltaT(0) / numIterations;
|
||||
shrLogEx(LOGBOTH | MASTER, 0, "oclBlackScholes, Throughput = %.4f GOptions/s, Time = %.5f s, Size = %u options, NumDevsUsed = %i, Workgroup = %u\n",
|
||||
shrLogEx(LOGBOTH | MASTER, 0, "oclBlackScholes, Throughput = %.4f GOptions/s, Time = %.5f s, Size = %u options, NumDevsUsed = %i, Workgroup = %u\n",
|
||||
(double)(2.0 * optionCount * 1.0e-9)/gpuTime, gpuTime, (2 * optionCount), 1, 0);
|
||||
|
||||
//Get profiling info
|
||||
|
@ -208,7 +208,7 @@ int main(int argc, char **argv)
|
|||
ciErrNum = clEnqueueReadBuffer(cqCommandQueue, d_Put, CL_TRUE, 0, optionCount * sizeof(float), h_PutGPU, 0, NULL, NULL);
|
||||
oclCheckError(ciErrNum, CL_SUCCESS);
|
||||
|
||||
shrLog("Comparing against Host/C++ computation...\n");
|
||||
shrLog("Comparing against Host/C++ computation...\n");
|
||||
BlackScholesCPU(h_CallCPU, h_PutCPU, h_S, h_X, h_T, R, V, optionCount);
|
||||
double deltaCall = 0, deltaPut = 0, sumCall = 0, sumPut = 0;
|
||||
double L1call, L1put;
|
||||
|
@ -219,7 +219,7 @@ int main(int argc, char **argv)
|
|||
deltaCall += fabs(h_CallCPU[i] - h_CallGPU[i]);
|
||||
deltaPut += fabs(h_PutCPU[i] - h_PutGPU[i]);
|
||||
}
|
||||
L1call = deltaCall / sumCall;
|
||||
L1call = deltaCall / sumCall;
|
||||
L1put = deltaPut / sumPut;
|
||||
shrLog("Relative L1 (call, put) = (%.3e, %.3e)\n\n", L1call, L1put);
|
||||
|
||||
|
|
|
@ -332,9 +332,9 @@ void kernel_trigo(int task_id, kernel_arg_t* __UNIFORM__ arg) {
|
|||
auto dst_ptr = (float*)arg->dst_addr;
|
||||
auto offset = task_id * count;
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
auto a = sinf(src0_ptr[offset+i]);
|
||||
auto b = cosf(src1_ptr[offset+i]);
|
||||
dst_ptr[offset+i] = a + b;
|
||||
auto a = src0_ptr[offset+i];
|
||||
auto b = src1_ptr[offset+i];
|
||||
dst_ptr[offset+i] = sinf(a * b);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -752,7 +752,7 @@ public:
|
|||
auto b = (float*)src2;
|
||||
auto c = (float*)dst;
|
||||
for (uint32_t i = 0; i < n; ++i) {
|
||||
auto ref = sinf(a[i]) + cosf(b[i]);
|
||||
auto ref = sinf(a[i] * b[i]);
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
|
|
|
@ -70,7 +70,7 @@ void gen_src_data(std::vector<TYPE>& src_data, uint32_t size) {
|
|||
auto r = static_cast<float>(std::rand()) / RAND_MAX;
|
||||
auto value = static_cast<TYPE>(r * size);
|
||||
src_data[i] = value;
|
||||
std::cout << std::dec << i << ": value=" << value << std::endl;
|
||||
//std::cout << std::dec << i << ": value=" << value << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue