Merge pull request #3029 from BOINC/knr_gpu_peak_flops_fix

FLOPS Fix for Server Release Branch
BOINC · Mar 6, 2019 · ea1a75e · ea1a75e
2 parents 6f9dd8d + e7fcd76
commit ea1a75e
Show file tree

Hide file tree

Showing 6 changed files with 69 additions and 8 deletions.
diff --git a/client/gpu_amd.cpp b/client/gpu_amd.cpp
@@ -377,6 +377,9 @@ void COPROC_ATI::get(
         cc.atirt_detected = atirt_detected;
         cc.device_num = i;
         cc.set_peak_flops();
+        if (cc.bad_gpu_peak_flops("CAL", s)) {
+            warnings.push_back(s);
+        }
         get_available_ati_ram(cc, warnings);
         ati_gpus.push_back(cc);
     }

diff --git a/client/gpu_nvidia.cpp b/client/gpu_nvidia.cpp
@@ -240,6 +240,7 @@ void COPROC_NVIDIA::get(
     char buf[256];
     int j, itemp;
     size_t global_mem = 0;
+    string s;
     COPROC_NVIDIA cc;
 
 #ifdef _WIN32
@@ -444,6 +445,9 @@ void* cudalib = NULL;
         cc.cuda_version = cuda_version;
         cc.device_num = j;
         cc.set_peak_flops();
+        if (cc.bad_gpu_peak_flops("CUDA", s)) {
+            warnings.push_back(s);
+        }
         get_available_nvidia_ram(cc, warnings);
         nvidia_gpus.push_back(cc);
     }

diff --git a/client/gpu_opencl.cpp b/client/gpu_opencl.cpp
@@ -202,6 +202,7 @@ void COPROCS::get_opencl(
     vector<int>devnums_pci_slot_sort;
     vector<OPENCL_DEVICE_PROP>::iterator it;
     int max_other_coprocs = MAX_RSC-1;  // coprocs[0] is reserved for CPU
+    string s;
 
     if (cc_config.no_opencl) {
         return;
@@ -486,6 +487,9 @@ void COPROCS::get_opencl(
                     COPROC_NVIDIA c;
                     c.opencl_prop = prop;
                     c.set_peak_flops();
+                    if (c.bad_gpu_peak_flops("NVIDIA OpenCL", s)) {
+                        warnings.push_back(s);
+                    }
                     prop.peak_flops = c.peak_flops;
                 }
                 if (cuda_match_found) {
@@ -552,6 +556,9 @@ void COPROCS::get_opencl(
                     COPROC_ATI c;
                     c.opencl_prop = prop;
                     c.set_peak_flops();
+                    if (c.bad_gpu_peak_flops("AMD OpenCL", s)) {
+                        warnings.push_back(s);
+                    }
                     prop.peak_flops = c.peak_flops;
                 }
 
@@ -576,6 +583,9 @@ void COPROCS::get_opencl(
                 safe_strcpy(c.version, prop.opencl_driver_version);
 
                 c.set_peak_flops();
+                if (c.bad_gpu_peak_flops("Intel OpenCL", s)) {
+                    warnings.push_back(s);
+                }
                 prop.peak_flops = c.peak_flops;
                 prop.opencl_available_ram = prop.global_mem_size;
 
@@ -606,12 +616,22 @@ void COPROCS::get_opencl(
                 prop.opencl_available_ram = prop.global_mem_size;
                 prop.is_used = COPROC_USED;
 
-                // TODO: Find a better way to calculate / estimate peak_flops for future coprocessors?
+                // TODO: is there a better way to estimate peak_flops?
+                //
                 prop.peak_flops = 0;
                 if (prop.max_compute_units) {
-                    prop.peak_flops = prop.max_compute_units * prop.max_clock_frequency * MEGA;
+                    double freq = ((double)prop.max_clock_frequency) * MEGA;
+                    prop.peak_flops = ((double)prop.max_compute_units) * freq;
+                }
+                if (prop.peak_flops <= 0 || prop.peak_flops > GPU_MAX_PEAK_FLOPS) {
+                    char buf2[256];
+                    sprintf(buf2,
+                        "OpenCL generic: bad peak FLOPS; Max units %d, max freq %d MHz",
+                        prop.max_compute_units, prop.max_clock_frequency
+                    );
+                    warnings.push_back(buf2);
+                    prop.peak_flops = GPU_DEFAULT_PEAK_FLOPS;
                 }
-                if (prop.peak_flops <= 0) prop.peak_flops = 45e9;
 
                 other_opencls.push_back(prop);
             }

diff --git a/lib/coproc.cpp b/lib/coproc.cpp
@@ -600,7 +600,7 @@ void COPROC_NVIDIA::set_peak_flops() {
         //
         x = opencl_prop.max_compute_units * 48 * 2 * opencl_prop.max_clock_frequency * 1e6;
     }
-    peak_flops =  (x>0)?x:5e10;
+    peak_flops = x;
 }
 
 // fake a NVIDIA GPU (for debugging)
@@ -868,7 +868,7 @@ void COPROC_ATI::set_peak_flops() {
         //
         x = opencl_prop.max_compute_units * 16 * 5 * opencl_prop.max_clock_frequency * 1e6;
     }
-    peak_flops = (x>0)?x:5e10;
+    peak_flops = x;
 }
 
 void COPROC_ATI::fake(double ram, double avail_ram, int n) {
@@ -980,7 +980,7 @@ void COPROC_INTEL::set_peak_flops() {
     if (opencl_prop.max_compute_units) {
         x = opencl_prop.max_compute_units * 8 * opencl_prop.max_clock_frequency * 1e6;
     }
-    peak_flops = (x>0)?x:45e9;
+    peak_flops = x;
 }
 
 void COPROC_INTEL::fake(double ram, double avail_ram, int n) {

diff --git a/lib/coproc.h b/lib/coproc.h
@@ -91,6 +91,14 @@
 #define MAX_COPROC_INSTANCES 64
 #define MAX_RSC 8
     // max # of processing resources types
+#define GPU_MAX_PEAK_FLOPS  1.e15
+    // sanity-check bound for peak FLOPS
+    // for now (Feb 2019) 1000 TeraFLOPS.
+    // As of now, the fastest GPU is 20 TeraFLOPS (NVIDIA).
+    // May need to increase this at some point
+#define GPU_DEFAULT_PEAK_FLOPS  100.e9
+    // value to use if sanity check fails
+    // as of now (Feb 2019) 100 GigaFLOPS is a typical low-end GPU
 
 // arguments to proc_type_name() and proc_type_name_xml().
 //
@@ -246,6 +254,21 @@ struct COPROC {
         std::vector<OPENCL_DEVICE_PROP> &opencls,
         std::vector<int>& ignore_dev
     );
+
+    // sanity check GPU peak FLOPS
+    //
+    inline bool bad_gpu_peak_flops(const char* source, std::string& msg) {
+        if (peak_flops <= 0 || peak_flops > GPU_MAX_PEAK_FLOPS) {
+            char buf[256];
+            sprintf(buf, "%s reported bad GPU peak FLOPS %f; using %f",
+                source, peak_flops, GPU_DEFAULT_PEAK_FLOPS
+            );
+            msg = buf;
+            peak_flops = GPU_DEFAULT_PEAK_FLOPS;
+            return true;
+        }
+        return false;
+    }
 };
 
 // Based on cudaDeviceProp from /usr/local/cuda/include/driver_types.h

diff --git a/sched/plan_class_spec.cpp b/sched/plan_class_spec.cpp
@@ -181,6 +181,7 @@ bool PLAN_CLASS_SPEC::opencl_check(OPENCL_DEVICE_PROP& opencl_prop) {
 bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKUNIT* wu) {
     COPROC* cpp = NULL;
     bool can_use_multicore = true;
+    string msg;
 
     if (infeasible_random && drand()<infeasible_random) {
         return false;
@@ -594,7 +595,9 @@ bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKU
             return false;
         }
 
-        cp.set_peak_flops();
+        if (cp.bad_gpu_peak_flops("AMD", msg)) {
+            log_messages.printf(MSG_NORMAL, "%s\n", msg.c_str());
+        }
         gpu_ram = cp.opencl_prop.global_mem_size;
 
         driver_version = 0;
@@ -686,7 +689,9 @@ bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKU
             }
         }
         gpu_ram = cp.prop.totalGlobalMem;
-        cp.set_peak_flops();
+        if (cp.bad_gpu_peak_flops("NVIDIA", msg)) {
+            log_messages.printf(MSG_NORMAL, "%s\n", msg.c_str());
+        }
 
     // Intel GPU
     //
@@ -705,6 +710,9 @@ bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKU
         if (min_gpu_ram_mb) {
             gpu_requirements[PROC_TYPE_INTEL_GPU].update(0, min_gpu_ram_mb * MEGA);
         }
+        if (cp.bad_gpu_peak_flops("Intel GPU", msg)) {
+            log_messages.printf(MSG_NORMAL, "%s\n", msg.c_str());
+        }
 
     // custom GPU type
     //
@@ -723,6 +731,9 @@ bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKU
                 "[version] plan_class_spec: Custom coproc %s found\n", gpu_type
             );
         }
+        if (cpp->bad_gpu_peak_flops("Custom GPU", msg)) {
+            log_messages.printf(MSG_NORMAL, "%s\n", msg.c_str());
+        }
     }
 
     if (opencl) {