Skip to content

Commit

Permalink
Merge pull request #3029 from BOINC/knr_gpu_peak_flops_fix
Browse files Browse the repository at this point in the history
FLOPS Fix for Server Release Branch
  • Loading branch information
lfield authored Mar 6, 2019
2 parents 6f9dd8d + e7fcd76 commit ea1a75e
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 8 deletions.
3 changes: 3 additions & 0 deletions client/gpu_amd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,9 @@ void COPROC_ATI::get(
cc.atirt_detected = atirt_detected;
cc.device_num = i;
cc.set_peak_flops();
if (cc.bad_gpu_peak_flops("CAL", s)) {
warnings.push_back(s);
}
get_available_ati_ram(cc, warnings);
ati_gpus.push_back(cc);
}
Expand Down
4 changes: 4 additions & 0 deletions client/gpu_nvidia.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ void COPROC_NVIDIA::get(
char buf[256];
int j, itemp;
size_t global_mem = 0;
string s;
COPROC_NVIDIA cc;

#ifdef _WIN32
Expand Down Expand Up @@ -444,6 +445,9 @@ void* cudalib = NULL;
cc.cuda_version = cuda_version;
cc.device_num = j;
cc.set_peak_flops();
if (cc.bad_gpu_peak_flops("CUDA", s)) {
warnings.push_back(s);
}
get_available_nvidia_ram(cc, warnings);
nvidia_gpus.push_back(cc);
}
Expand Down
26 changes: 23 additions & 3 deletions client/gpu_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ void COPROCS::get_opencl(
vector<int>devnums_pci_slot_sort;
vector<OPENCL_DEVICE_PROP>::iterator it;
int max_other_coprocs = MAX_RSC-1; // coprocs[0] is reserved for CPU
string s;

if (cc_config.no_opencl) {
return;
Expand Down Expand Up @@ -486,6 +487,9 @@ void COPROCS::get_opencl(
COPROC_NVIDIA c;
c.opencl_prop = prop;
c.set_peak_flops();
if (c.bad_gpu_peak_flops("NVIDIA OpenCL", s)) {
warnings.push_back(s);
}
prop.peak_flops = c.peak_flops;
}
if (cuda_match_found) {
Expand Down Expand Up @@ -552,6 +556,9 @@ void COPROCS::get_opencl(
COPROC_ATI c;
c.opencl_prop = prop;
c.set_peak_flops();
if (c.bad_gpu_peak_flops("AMD OpenCL", s)) {
warnings.push_back(s);
}
prop.peak_flops = c.peak_flops;
}

Expand All @@ -576,6 +583,9 @@ void COPROCS::get_opencl(
safe_strcpy(c.version, prop.opencl_driver_version);

c.set_peak_flops();
if (c.bad_gpu_peak_flops("Intel OpenCL", s)) {
warnings.push_back(s);
}
prop.peak_flops = c.peak_flops;
prop.opencl_available_ram = prop.global_mem_size;

Expand Down Expand Up @@ -606,12 +616,22 @@ void COPROCS::get_opencl(
prop.opencl_available_ram = prop.global_mem_size;
prop.is_used = COPROC_USED;

// TODO: Find a better way to calculate / estimate peak_flops for future coprocessors?
// TODO: is there a better way to estimate peak_flops?
//
prop.peak_flops = 0;
if (prop.max_compute_units) {
prop.peak_flops = prop.max_compute_units * prop.max_clock_frequency * MEGA;
double freq = ((double)prop.max_clock_frequency) * MEGA;
prop.peak_flops = ((double)prop.max_compute_units) * freq;
}
if (prop.peak_flops <= 0 || prop.peak_flops > GPU_MAX_PEAK_FLOPS) {
char buf2[256];
sprintf(buf2,
"OpenCL generic: bad peak FLOPS; Max units %d, max freq %d MHz",
prop.max_compute_units, prop.max_clock_frequency
);
warnings.push_back(buf2);
prop.peak_flops = GPU_DEFAULT_PEAK_FLOPS;
}
if (prop.peak_flops <= 0) prop.peak_flops = 45e9;

other_opencls.push_back(prop);
}
Expand Down
6 changes: 3 additions & 3 deletions lib/coproc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -600,7 +600,7 @@ void COPROC_NVIDIA::set_peak_flops() {
//
x = opencl_prop.max_compute_units * 48 * 2 * opencl_prop.max_clock_frequency * 1e6;
}
peak_flops = (x>0)?x:5e10;
peak_flops = x;
}

// fake a NVIDIA GPU (for debugging)
Expand Down Expand Up @@ -868,7 +868,7 @@ void COPROC_ATI::set_peak_flops() {
//
x = opencl_prop.max_compute_units * 16 * 5 * opencl_prop.max_clock_frequency * 1e6;
}
peak_flops = (x>0)?x:5e10;
peak_flops = x;
}

void COPROC_ATI::fake(double ram, double avail_ram, int n) {
Expand Down Expand Up @@ -980,7 +980,7 @@ void COPROC_INTEL::set_peak_flops() {
if (opencl_prop.max_compute_units) {
x = opencl_prop.max_compute_units * 8 * opencl_prop.max_clock_frequency * 1e6;
}
peak_flops = (x>0)?x:45e9;
peak_flops = x;
}

void COPROC_INTEL::fake(double ram, double avail_ram, int n) {
Expand Down
23 changes: 23 additions & 0 deletions lib/coproc.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,14 @@
#define MAX_COPROC_INSTANCES 64
#define MAX_RSC 8
// max # of processing resources types
#define GPU_MAX_PEAK_FLOPS 1.e15
// sanity-check bound for peak FLOPS
// for now (Feb 2019) 1000 TeraFLOPS.
// As of now, the fastest GPU is 20 TeraFLOPS (NVIDIA).
// May need to increase this at some point
#define GPU_DEFAULT_PEAK_FLOPS 100.e9
// value to use if sanity check fails
// as of now (Feb 2019) 100 GigaFLOPS is a typical low-end GPU

// arguments to proc_type_name() and proc_type_name_xml().
//
Expand Down Expand Up @@ -246,6 +254,21 @@ struct COPROC {
std::vector<OPENCL_DEVICE_PROP> &opencls,
std::vector<int>& ignore_dev
);

// sanity check GPU peak FLOPS
//
inline bool bad_gpu_peak_flops(const char* source, std::string& msg) {
if (peak_flops <= 0 || peak_flops > GPU_MAX_PEAK_FLOPS) {
char buf[256];
sprintf(buf, "%s reported bad GPU peak FLOPS %f; using %f",
source, peak_flops, GPU_DEFAULT_PEAK_FLOPS
);
msg = buf;
peak_flops = GPU_DEFAULT_PEAK_FLOPS;
return true;
}
return false;
}
};

// Based on cudaDeviceProp from /usr/local/cuda/include/driver_types.h
Expand Down
15 changes: 13 additions & 2 deletions sched/plan_class_spec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ bool PLAN_CLASS_SPEC::opencl_check(OPENCL_DEVICE_PROP& opencl_prop) {
bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKUNIT* wu) {
COPROC* cpp = NULL;
bool can_use_multicore = true;
string msg;

if (infeasible_random && drand()<infeasible_random) {
return false;
Expand Down Expand Up @@ -594,7 +595,9 @@ bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKU
return false;
}

cp.set_peak_flops();
if (cp.bad_gpu_peak_flops("AMD", msg)) {
log_messages.printf(MSG_NORMAL, "%s\n", msg.c_str());
}
gpu_ram = cp.opencl_prop.global_mem_size;

driver_version = 0;
Expand Down Expand Up @@ -686,7 +689,9 @@ bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKU
}
}
gpu_ram = cp.prop.totalGlobalMem;
cp.set_peak_flops();
if (cp.bad_gpu_peak_flops("NVIDIA", msg)) {
log_messages.printf(MSG_NORMAL, "%s\n", msg.c_str());
}

// Intel GPU
//
Expand All @@ -705,6 +710,9 @@ bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKU
if (min_gpu_ram_mb) {
gpu_requirements[PROC_TYPE_INTEL_GPU].update(0, min_gpu_ram_mb * MEGA);
}
if (cp.bad_gpu_peak_flops("Intel GPU", msg)) {
log_messages.printf(MSG_NORMAL, "%s\n", msg.c_str());
}

// custom GPU type
//
Expand All @@ -723,6 +731,9 @@ bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKU
"[version] plan_class_spec: Custom coproc %s found\n", gpu_type
);
}
if (cpp->bad_gpu_peak_flops("Custom GPU", msg)) {
log_messages.printf(MSG_NORMAL, "%s\n", msg.c_str());
}
}

if (opencl) {
Expand Down

0 comments on commit ea1a75e

Please sign in to comment.