| | | | | | | requested parallelism | walltime sum (s) | nb instances | any sync average per thread time (s) | any wait average per thread time (s) | parallelism overhead (%) | local speedup if perfectly balanced | global speedup if perfectly balanced |
start addr | function name | source location | level | ancestor thread num | invoker | parallel or teams | m1o1 | m1o2 | m1o4 | m1o8 | m1o16 | m1o26 | m1o52 | m1o1 | m1o2 | m1o4 | m1o8 | m1o16 | m1o26 | m1o52 | m1o1 | m1o2 | m1o4 | m1o8 | m1o16 | m1o26 | m1o52 | m1o1 | m1o2 | m1o4 | m1o8 | m1o16 | m1o26 | m1o52 | m1o1 | m1o2 | m1o4 | m1o8 | m1o16 | m1o26 | m1o52 | m1o1 | m1o2 | m1o4 | m1o8 | m1o16 | m1o26 | m1o52 | m1o1 | m1o2 | m1o4 | m1o8 | m1o16 | m1o26 | m1o52 | m1o1 | m1o2 | m1o4 | m1o8 | m1o16 | m1o26 | m1o52 |
libqmckl.so.0.0.0:0x3438d | qmckl_compute_dtmp_c_hpc | qmckl_jastrow_champ.c:4124 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 82.931 | 41.936 | 23.092 | 11.705 | 6.081 | 4.230 | 4.018 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 0.0 | 0.133 | 1.581 | 0.690 | 0.390 | 0.370 | 0.550 | 0.0 | 0.133 | 1.581 | 0.690 | 0.390 | 0.370 | 0.550 | 0 | 0.32 | 6.85 | 5.89 | 6.41 | 8.76 | 13.7 | 1.000 | 1.003 | 1.074 | 1.063 | 1.068 | 1.096 | 1.159 | 1.000 | 1.001 | 1.017 | 1.009 | 1.006 | 1.006 | 1.009 |
libqmckl.so.0.0.0:0x33baa | qmckl_compute_tmp_c_hpc | qmckl_jastrow_champ.c:4054 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 43.402 | 21.783 | 11.753 | 6.127 | 3.177 | 2.224 | 1.996 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 0.0 | 29.1 E-3 | 0.589 | 0.364 | 0.206 | 0.205 | 0.230 | 0.0 | 29.1 E-3 | 0.589 | 0.364 | 0.206 | 0.205 | 0.230 | 0 | 0.13 | 5.01 | 5.94 | 6.48 | 9.24 | 11.5 | 1.000 | 1.001 | 1.053 | 1.063 | 1.069 | 1.102 | 1.130 | 1.000 | 1.000 | 1.006 | 1.005 | 1.003 | 1.003 | 1.004 |
libqmckl.so.0.0.0:0x3543c | qmckl_compute_jastrow_champ_factor_een_gl_hpc | qmckl_jastrow_champ.c:4506 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 9.920 | 5.327 | 3.028 | 1.937 | 1.635 | 1.661 | 2.073 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 0.0 | 714 E-6 | 38.3 E-3 | 72.4 E-3 | 84.9 E-3 | 56.4 E-3 | 0.222 | 0.0 | 709 E-6 | 38.2 E-3 | 72.4 E-3 | 84.9 E-3 | 56.4 E-3 | 0.222 | 0 | 0.01 | 1.26 | 3.74 | 5.19 | 3.39 | 10.7 | 1.000 | 1.000 | 1.013 | 1.039 | 1.055 | 1.035 | 1.120 | 1.000 | 1.000 | 1.000 | 1.001 | 1.001 | 1.001 | 1.004 |
libqmckl.so.0.0.0:0x3169c | qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_hpc | qmckl_jastrow_champ.c:3196 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 8.656 | 4.501 | 3.372 | 1.898 | 1.369 | 1.165 | 1.127 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 0.0 | 6.26 E-3 | 0.668 | 0.316 | 0.201 | 87.7 E-3 | 0.113 | 0.0 | 6.25 E-3 | 0.668 | 0.316 | 0.201 | 87.6 E-3 | 0.113 | 0 | 0.14 | 19.8 | 16.6 | 14.7 | 7.53 | 10.1 | 1.000 | 1.001 | 1.247 | 1.200 | 1.172 | 1.081 | 1.112 | 1.000 | 1.000 | 1.007 | 1.004 | 1.003 | 1.001 | 1.002 |
libqmckl.so.0.0.0:0x2fba1 | qmckl_compute_een_rescaled_e_hpc | qmckl_jastrow_champ.c:2970 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 7.802 | 4.446 | 2.816 | 2.081 | 1.450 | 1.262 | 1.328 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 45.2 E-6 | 3.57 E-3 | 0.218 | 0.380 | 0.249 | 0.103 | 81.1 E-3 | 14.9 E-6 | 3.54 E-3 | 0.218 | 0.380 | 0.249 | 0.103 | 81.1 E-3 | 0.00 | 0.08 | 7.73 | 18.3 | 17.2 | 8.14 | 6.11 | 1.000 | 1.001 | 1.084 | 1.224 | 1.207 | 1.089 | 1.065 | 1.000 | 1.000 | 1.002 | 1.005 | 1.004 | 1.002 | 1.001 |
libqmckl.so.0.0.0:0x2bde9 | qmckl_compute_jastrow_champ_factor_ee_gl_hpc | qmckl_jastrow_champ.c:1652 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 5.964 | 2.986 | 1.512 | 0.788 | 0.423 | 0.245 | 0.128 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 0.0 | 1.07 E-3 | 14.2 E-3 | 35.6 E-3 | 47.2 E-3 | 13.4 E-3 | 9.26 E-3 | 0.0 | 1.06 E-3 | 14.2 E-3 | 35.6 E-3 | 47.2 E-3 | 13.4 E-3 | 9.25 E-3 | 0 | 0.04 | 0.94 | 4.52 | 11.2 | 5.47 | 7.24 | 1.000 | 1.000 | 1.009 | 1.047 | 1.126 | 1.058 | 1.078 | 1.000 | 1.000 | 1.000 | 1.000 | 1.001 | 1.000 | 1.000 |
libqmckl.so.0.0.0:0x2b871 | qmckl_compute_ee_distance_rescaled_gl_hpc | qmckl_jastrow_champ.c:1984 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 1.856 | 0.934 | 0.494 | 0.266 | 0.185 | 0.193 | 0.191 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 16.9 E-6 | 1.46 E-3 | 19.5 E-3 | 19.4 E-3 | 19.6 E-3 | 13.4 E-3 | 17.9 E-3 | 3.24 E-6 | 1.45 E-3 | 19.5 E-3 | 19.4 E-3 | 19.6 E-3 | 13.4 E-3 | 17.9 E-3 | 0.00 | 0.16 | 3.94 | 7.27 | 10.6 | 6.98 | 9.39 | 1.000 | 1.002 | 1.041 | 1.078 | 1.119 | 1.075 | 1.104 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
libqmckl.so.0.0.0:0x2eb59 | qmckl_compute_jastrow_champ_factor_en_gl_hpc | qmckl_jastrow_champ.c:2446 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 1.458 | 0.732 | 0.366 | 0.193 | 0.103 | 58.8 E-3 | 35.8 E-3 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 0.0 | 659 E-6 | 927 E-6 | 9.77 E-3 | 11.4 E-3 | 2.44 E-3 | 6.31 E-3 | 0.0 | 655 E-6 | 922 E-6 | 9.77 E-3 | 11.4 E-3 | 2.44 E-3 | 6.31 E-3 | 0 | 0.09 | 0.25 | 5.06 | 11.0 | 4.15 | 17.6 | 1.000 | 1.001 | 1.003 | 1.053 | 1.124 | 1.043 | 1.214 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
libqmckl.so.0.0.0:0x2d7c8 | qmckl_compute_jastrow_champ_factor_en_hpc | qmckl_jastrow_champ.c:2277 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 0.878 | 0.447 | 0.228 | 0.122 | 72.2 E-3 | 40.5 E-3 | 24.4 E-3 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 0.0 | 2.07 E-3 | 5.34 E-3 | 9.80 E-3 | 14.7 E-3 | 4.89 E-3 | 4.86 E-3 | 0.0 | 2.06 E-3 | 5.34 E-3 | 9.80 E-3 | 14.7 E-3 | 4.89 E-3 | 4.86 E-3 | 0 | 0.46 | 2.35 | 8.04 | 20.4 | 12.1 | 19.9 | 1.000 | 1.005 | 1.024 | 1.087 | 1.256 | 1.137 | 1.249 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
libqmckl.so.0.0.0:0x2a385 | qmckl_compute_jastrow_champ_factor_ee_hpc | qmckl_jastrow_champ.c:1432 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 0.471 | 0.446 | 0.455 | 0.468 | 0.484 | 0.524 | 0.582 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 0.0 | 94.2 E-6 | 194 E-6 | 784 E-6 | 1.28 E-3 | 2.07 E-3 | 14.2 E-3 | 0.0 | 82.9 E-6 | 183 E-6 | 774 E-6 | 1.26 E-3 | 2.06 E-3 | 14.2 E-3 | 0 | 0.02 | 0.04 | 0.17 | 0.26 | 0.40 | 2.44 | 1.000 | 1.000 | 1.000 | 1.002 | 1.003 | 1.004 | 1.025 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
libqmckl.so.0.0.0:0x29f09 | qmckl_compute_ee_distance_rescaled_hpc | qmckl_jastrow_champ.c:1851 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 85.7 E-3 | 43.1 E-3 | 22.0 E-3 | 12.0 E-3 | 8.68 E-3 | 7.68 E-3 | 21.8 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 5.36 E-6 | 62.9 E-6 | 182 E-6 | 584 E-6 | 905 E-6 | 449 E-6 | 9.33 E-3 | 318 E-9 | 55.6 E-6 | 173 E-6 | 576 E-6 | 896 E-6 | 439 E-6 | 9.33 E-3 | 0.01 | 0.15 | 0.83 | 4.87 | 10.4 | 5.85 | 42.8 | 1.000 | 1.001 | 1.008 | 1.051 | 1.116 | 1.062 | 1.749 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
libqmckl.so.0.0.0:0x2e32e | qmckl_compute_en_distance_rescaled_gl_hpc | qmckl_jastrow_champ.c:2798 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 44.2 E-3 | 22.4 E-3 | 11.8 E-3 | 6.31 E-3 | 4.08 E-3 | 3.91 E-3 | 3.35 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.50 E-6 | 205 E-6 | 480 E-6 | 489 E-6 | 497 E-6 | 598 E-6 | 330 E-6 | 354 E-9 | 203 E-6 | 479 E-6 | 488 E-6 | 496 E-6 | 597 E-6 | 329 E-6 | 0.00 | 0.91 | 4.08 | 7.75 | 12.2 | 15.3 | 9.86 | 1.000 | 1.009 | 1.043 | 1.084 | 1.138 | 1.181 | 1.109 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
libqmckl.so.0.0.0:0x2d0a5 | qmckl_compute_en_distance_rescaled_hpc | qmckl_jastrow_champ.c:2642 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 20.6 E-3 | 10.3 E-3 | 5.20 E-3 | 2.73 E-3 | 1.50 E-3 | 900 E-6 | 1.21 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.27 E-6 | 7.80 E-6 | 16.5 E-6 | 115 E-6 | 176 E-6 | 57.2 E-6 | 567 E-6 | 182 E-9 | 6.93 E-6 | 15.7 E-6 | 115 E-6 | 175 E-6 | 56.6 E-6 | 566 E-6 | 0.01 | 0.08 | 0.32 | 4.22 | 11.7 | 6.35 | 47.0 | 1.000 | 1.001 | 1.003 | 1.044 | 1.133 | 1.068 | 1.888 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |