| | | | | | | requested parallelism | walltime sum (s) | nb instances | any sync average per thread time (s) | any wait average per thread time (s) | parallelism overhead (%) | local speedup if perfectly balanced | global speedup if perfectly balanced |
start addr | function name | source location | level | ancestor thread num | invoker | parallel or teams | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x48 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x48 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x48 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x48 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x48 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x48 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x48 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x48 |
exec:0x40e17b | ljForce | ljForce.c:172 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 241.558 | 121.005 | 60.741 | 30.744 | 15.486 | 9.777 | 7.745 | 101 | 101 | 101 | 101 | 101 | 101 | 101 | 102 E-6 | 61.6 E-3 | 90.4 E-3 | 0.404 | 0.314 | 0.905 | 0.928 | 37.8 E-6 | 61.6 E-3 | 90.4 E-3 | 0.404 | 0.314 | 0.905 | 0.928 | 0.00 | 0.05 | 0.15 | 1.31 | 2.03 | 9.26 | 12.0 | 1.000 | 1.001 | 1.001 | 1.013 | 1.021 | 1.102 | 1.136 | 1.000 | 1.000 | 1.001 | 1.011 | 1.016 | 1.058 | 1.064 |
exec:0x41207d | timestep | timestep.c:152 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 5.282 | 2.748 | 1.412 | 0.736 | 0.419 | 0.732 | 0.944 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 0.0 | 22.0 E-3 | 23.0 E-3 | 27.3 E-3 | 17.0 E-3 | 0.138 | 0.205 | 0.0 | 22.0 E-3 | 23.0 E-3 | 27.3 E-3 | 16.9 E-3 | 0.138 | 0.205 | 0 | 0.80 | 1.63 | 3.71 | 4.05 | 18.8 | 21.7 | 1.000 | 1.008 | 1.017 | 1.039 | 1.042 | 1.232 | 1.277 | 1.000 | 1.000 | 1.000 | 1.001 | 1.001 | 1.008 | 1.013 |
exec:0x411f53 | timestep | timestep.c:85 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 1.861 | 1.007 | 0.541 | 0.287 | 0.176 | 0.363 | 0.545 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 0.0 | 32.1 E-3 | 30.1 E-3 | 17.0 E-3 | 8.76 E-3 | 76.5 E-3 | 0.127 | 0.0 | 32.1 E-3 | 30.1 E-3 | 17.0 E-3 | 8.75 E-3 | 76.5 E-3 | 0.127 | 0 | 3.19 | 5.57 | 5.92 | 4.96 | 21.1 | 23.3 | 1.000 | 1.033 | 1.059 | 1.063 | 1.052 | 1.267 | 1.303 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.005 | 1.008 |
exec:0x41219f | timestep | timestep.c:71 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 1.783 | 0.970 | 0.522 | 0.278 | 0.169 | 0.293 | 0.460 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 0.0 | 34.4 E-3 | 32.1 E-3 | 18.1 E-3 | 9.42 E-3 | 75.6 E-3 | 0.127 | 0.0 | 34.4 E-3 | 32.1 E-3 | 18.1 E-3 | 9.41 E-3 | 75.6 E-3 | 0.127 | 0 | 3.54 | 6.15 | 6.51 | 5.58 | 25.8 | 27.5 | 1.000 | 1.037 | 1.066 | 1.070 | 1.059 | 1.348 | 1.380 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.005 | 1.008 |
exec:0x411ea7 | timestep | timestep.c:71 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 1.778 | 0.967 | 0.521 | 0.278 | 0.166 | 0.335 | 0.512 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 0.0 | 34.2 E-3 | 32.0 E-3 | 17.7 E-3 | 9.29 E-3 | 75.6 E-3 | 0.128 | 0.0 | 34.2 E-3 | 32.0 E-3 | 17.7 E-3 | 9.28 E-3 | 75.6 E-3 | 0.128 | 0 | 3.54 | 6.14 | 6.37 | 5.61 | 22.6 | 25.0 | 1.000 | 1.037 | 1.065 | 1.068 | 1.059 | 1.292 | 1.333 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.005 | 1.008 |
exec:0x40e0ef | ljForce | ljForce.c:157 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 1.047 | 0.659 | 0.381 | 0.201 | 0.168 | 0.991 | 1.129 | 101 | 101 | 101 | 101 | 101 | 101 | 101 | 0.0 | 69.5 E-3 | 52.2 E-3 | 26.5 E-3 | 12.3 E-3 | 88.4 E-3 | 0.186 | 0.0 | 69.5 E-3 | 52.2 E-3 | 26.5 E-3 | 12.3 E-3 | 88.3 E-3 | 0.186 | 0 | 10.5 | 13.7 | 13.2 | 7.33 | 8.92 | 16.5 | 1.000 | 1.118 | 1.159 | 1.152 | 1.079 | 1.098 | 1.197 | 1.000 | 1.001 | 1.001 | 1.001 | 1.001 | 1.005 | 1.012 |
exec:0x40b843 | setTemperature | initAtoms.c:151 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 0.181 | 91.4 E-3 | 46.0 E-3 | 23.7 E-3 | 14.2 E-3 | 10.2 E-3 | 9.07 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 0.0 | 505 E-6 | 203 E-6 | 404 E-6 | 255 E-6 | 1.09 E-3 | 1.57 E-3 | 0.0 | 505 E-6 | 203 E-6 | 404 E-6 | 255 E-6 | 1.09 E-3 | 1.57 E-3 | 0 | 0.55 | 0.44 | 1.71 | 1.80 | 10.5 | 16.6 | 1.000 | 1.006 | 1.004 | 1.017 | 1.018 | 1.117 | 1.200 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
exec:0x412208 | timestep | timestep.c:107 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 0.112 | 60.7 E-3 | 31.9 E-3 | 16.7 E-3 | 9.57 E-3 | 23.7 E-3 | 37.1 E-3 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 8.33 E-6 | 1.84 E-3 | 1.47 E-3 | 1.02 E-3 | 577 E-6 | 4.79 E-3 | 9.07 E-3 | 3.20 E-6 | 1.83 E-3 | 1.47 E-3 | 1.02 E-3 | 571 E-6 | 4.78 E-3 | 9.06 E-3 | 0.01 | 3.03 | 4.61 | 6.13 | 6.03 | 20.2 | 24.4 | 1.000 | 1.031 | 1.048 | 1.065 | 1.064 | 1.253 | 1.323 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.001 |
exec:0x4049dd | main | initAtoms.c:194 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 59.5 E-3 | 35.1 E-3 | 17.8 E-3 | 9.23 E-3 | 4.67 E-3 | 4.75 E-3 | 5.00 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 0.0 | 2.68 E-3 | 1.19 E-3 | 551 E-6 | 361 E-6 | 831 E-6 | 1.19 E-3 | 0.0 | 2.68 E-3 | 1.19 E-3 | 551 E-6 | 361 E-6 | 831 E-6 | 1.19 E-3 | 0 | 7.63 | 6.66 | 5.97 | 7.73 | 17.5 | 23.8 | 1.000 | 1.083 | 1.071 | 1.063 | 1.084 | 1.212 | 1.313 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
exec:0x404a27 | main | CoMD.c:206 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 51.7 E-3 | 27.3 E-3 | 14.1 E-3 | 7.32 E-3 | 4.08 E-3 | 7.57 E-3 | 9.04 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 0.0 | 379 E-6 | 383 E-6 | 326 E-6 | 188 E-6 | 1.36 E-3 | 1.99 E-3 | 0.0 | 379 E-6 | 383 E-6 | 326 E-6 | 188 E-6 | 1.36 E-3 | 1.99 E-3 | 0 | 1.38 | 2.72 | 4.46 | 4.60 | 18.0 | 22.1 | 1.000 | 1.014 | 1.028 | 1.047 | 1.048 | 1.219 | 1.283 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
exec:0x412d98 | kineticEnergy | timestep.c:107 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 31.9 E-3 | 17.7 E-3 | 9.47 E-3 | 4.95 E-3 | 2.81 E-3 | 7.45 E-3 | 10.5 E-3 | 3.00 | 3.00 | 3.00 | 3.00 | 3.00 | 3.00 | 3.00 | 3.08 E-6 | 846 E-6 | 605 E-6 | 363 E-6 | 207 E-6 | 1.48 E-3 | 2.45 E-3 | 1.13 E-6 | 843 E-6 | 603 E-6 | 361 E-6 | 205 E-6 | 1.48 E-3 | 2.45 E-3 | 0.01 | 4.77 | 6.38 | 7.34 | 7.36 | 19.8 | 23.5 | 1.000 | 1.050 | 1.068 | 1.079 | 1.079 | 1.248 | 1.307 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
exec:0x40af8e | setVcm | initAtoms.c:218 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 10.6 E-3 | 6.02 E-3 | 3.19 E-3 | 1.68 E-3 | 971 E-6 | 2.23 E-3 | 2.89 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 2.76 E-6 | 332 E-6 | 210 E-6 | 135 E-6 | 71.7 E-6 | 447 E-6 | 729 E-6 | 476 E-9 | 330 E-6 | 209 E-6 | 134 E-6 | 70.8 E-6 | 446 E-6 | 728 E-6 | 0.03 | 5.51 | 6.58 | 8.07 | 7.38 | 20.1 | 25.3 | 1.000 | 1.058 | 1.070 | 1.088 | 1.080 | 1.251 | 1.338 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
exec:0x40b0e5 | setVcm | initAtoms.c:123 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 7.88 E-3 | 4.93 E-3 | 2.64 E-3 | 1.43 E-3 | 820 E-6 | 1.87 E-3 | 3.09 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 0.0 | 466 E-6 | 236 E-6 | 125 E-6 | 74.5 E-6 | 480 E-6 | 1.03 E-3 | 0.0 | 466 E-6 | 236 E-6 | 125 E-6 | 74.4 E-6 | 480 E-6 | 1.03 E-3 | 0 | 9.47 | 8.94 | 8.71 | 9.09 | 25.7 | 33.3 | 1.000 | 1.105 | 1.098 | 1.095 | 1.100 | 1.345 | 1.498 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
exec:0x40b8fa | setTemperature | initAtoms.c:184 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 4.84 E-3 | 2.69 E-3 | 1.44 E-3 | 838 E-6 | 471 E-6 | 1.32 E-3 | 2.35 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 0.0 | 119 E-6 | 85.6 E-6 | 47.2 E-6 | 28.8 E-6 | 405 E-6 | 838 E-6 | 0.0 | 119 E-6 | 85.5 E-6 | 47.1 E-6 | 28.7 E-6 | 405 E-6 | 837 E-6 | 0 | 4.42 | 5.94 | 5.63 | 6.11 | 30.7 | 35.6 | 1.000 | 1.046 | 1.063 | 1.060 | 1.065 | 1.443 | 1.553 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |