Skip to content

Commit f932ba4

Browse files
tkfvtjnash
authored andcommitted
Use uv_thread_getaffinity when --threads=auto (JuliaLang#42340)
Co-authored-by: Jameson Nash <[email protected]>
1 parent 50c210a commit f932ba4

File tree

9 files changed

+79
-11
lines changed

9 files changed

+79
-11
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ Compiler/Runtime improvements
1616
Command-line option changes
1717
---------------------------
1818

19+
* In Linux and Windows, `--threads=auto` now tries to infer usable number of CPUs from the
20+
process affinity which is set typically in HPC and cloud environments ([#42340]).
1921

2022
Multi-threading changes
2123
-----------------------

doc/man/julia.1

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,13 @@ Load <file> immediately on all processors
103103

104104
.TP
105105
-t, --threads <n>
106-
Enable n threads
106+
Enable n threads; "auto" tries to infer a useful default number
107+
of threads to use but the exact behavior might change in the future.
108+
Currently, "auto" uses the number of CPUs assigned to this julia
109+
process based on the OS-specific affinity assignment interface, if
110+
supported (Linux and Windows). If this is not supported (macOS) or
111+
process affinity is not configured, it uses the number of CPU
112+
threads.
107113

108114
.TP
109115
-p, --procs <n>

doc/src/manual/command-line-options.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ The following is a complete list of command-line switches available when launchi
8989
|`-e`, `--eval <expr>` |Evaluate `<expr>`|
9090
|`-E`, `--print <expr>` |Evaluate `<expr>` and display the result|
9191
|`-L`, `--load <file>` |Load `<file>` immediately on all processors|
92-
|`-t`, `--threads {N\|auto`} |Enable N threads; `auto` currently sets N to the number of local CPU threads but this might change in the future|
92+
|`-t`, `--threads {N\|auto`} |Enable N threads; `auto` tries to infer a useful default number of threads to use but the exact behavior might change in the future. Currently, `auto` uses the number of CPUs assigned to this julia process based on the OS-specific affinity assignment interface, if supported (Linux and Windows). If this is not supported (macOS) or process affinity is not configured, it uses the number of CPU threads.|
9393
|`-p`, `--procs {N\|auto`} |Integer value N launches N additional local worker processes; `auto` launches as many workers as the number of local CPU threads (logical cores)|
9494
|`--machine-file <file>` |Run processes on hosts listed in `<file>`|
9595
|`-i` |Interactive mode; REPL runs and `isinteractive()` is true|
@@ -111,6 +111,8 @@ The following is a complete list of command-line switches available when launchi
111111
|`--track-allocation={none\|user\|all}` |Count bytes allocated by each source line|
112112
|`--track-allocation` |equivalent to `--track-allocation=user`|
113113

114+
115+
114116
!!! compat "Julia 1.1"
115117
In Julia 1.0, the default `--project=@.` option did not search up from the root
116118
directory of a Git repository for the `Project.toml` file. From Julia 1.1 forward, it

doc/src/manual/multi-threading.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ The number of execution threads is controlled either by using the
1919
specified, then `-t`/`--threads` takes precedence.
2020

2121
The number of threads can either be specified as an integer (`--threads=4`) or as `auto`
22-
(`--threads=auto`), where `auto` sets the number of threads to the number of local CPU
23-
threads.
22+
(`--threads=auto`), where `auto` tries to infer a useful default number of threads to use
23+
(see [Command-line Options](@id command-line-options) for more details).
2424

2525
!!! compat "Julia 1.5"
2626
The `-t`/`--threads` command line argument requires at least Julia 1.5.

src/jloptions.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,13 @@ static const char opts[] =
108108
" -L, --load <file> Load <file> immediately on all processors\n\n"
109109

110110
// parallel options
111-
" -t, --threads {N|auto} Enable N threads; \"auto\" currently sets N to the number of local\n"
112-
" CPU threads but this might change in the future\n"
111+
" -t, --threads {N|auto} Enable N threads; \"auto\" tries to infer a useful default number\n"
112+
" of threads to use but the exact behavior might change in the future.\n"
113+
" Currently, \"auto\" uses the number of CPUs assigned to this julia\n"
114+
" process based on the OS-specific affinity assignment interface, if\n"
115+
" supported (Linux and Windows). If this is not supported (macOS) or\n"
116+
" process affinity is not configured, it uses the number of CPU\n"
117+
" threads.\n"
113118
" -p, --procs {N|auto} Integer value N launches N additional local worker processes\n"
114119
" \"auto\" launches as many workers as the number of local CPU threads (logical cores)\n"
115120
" --machine-file <file> Run processes on hosts listed in <file>\n\n"
@@ -441,7 +446,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
441446
case 'p': // procs
442447
errno = 0;
443448
if (!strcmp(optarg,"auto")) {
444-
jl_options.nprocs = jl_cpu_threads();
449+
jl_options.nprocs = jl_effective_threads();
445450
}
446451
else {
447452
long nprocs = strtol(optarg, &endptr, 10);

src/julia_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -766,6 +766,7 @@ extern JL_DLLEXPORT ssize_t jl_tls_offset;
766766
extern JL_DLLEXPORT const int jl_tls_elf_support;
767767
void jl_init_threading(void);
768768
void jl_start_threads(void);
769+
int jl_effective_threads(void);
769770

770771
// Whether the GC is running
771772
extern char *jl_safepoint_pages;

src/sys.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,29 @@ JL_DLLEXPORT int jl_cpu_threads(void) JL_NOTSAFEPOINT
661661
#endif
662662
}
663663

664+
int jl_effective_threads(void) JL_NOTSAFEPOINT
665+
{
666+
int cpu = jl_cpu_threads();
667+
int masksize = uv_cpumask_size();
668+
if (masksize < 0 || jl_running_under_rr(0))
669+
return cpu;
670+
uv_thread_t tid = uv_thread_self();
671+
char *cpumask = (char *)calloc(masksize, sizeof(char));
672+
int err = uv_thread_getaffinity(&tid, cpumask, masksize);
673+
if (err) {
674+
free(cpumask);
675+
jl_safe_printf("WARNING: failed to get thread affinity (%s %d)\n", uv_err_name(err),
676+
err);
677+
return cpu;
678+
}
679+
int n = 0;
680+
for (size_t i = 0; i < masksize; i++) {
681+
n += cpumask[i];
682+
}
683+
free(cpumask);
684+
return n < cpu ? n : cpu;
685+
}
686+
664687

665688
// -- high resolution timers --
666689
// Returns time in nanosec

src/threading.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,7 @@ void jl_init_threading(void)
454454
// how many threads available, usable
455455
jl_n_threads = JULIA_NUM_THREADS;
456456
if (jl_options.nthreads < 0) { // --threads=auto
457-
jl_n_threads = jl_cpu_threads();
457+
jl_n_threads = jl_effective_threads();
458458
}
459459
else if (jl_options.nthreads > 0) { // --threads=N
460460
jl_n_threads = jl_options.nthreads;
@@ -463,7 +463,7 @@ void jl_init_threading(void)
463463
if (strcmp(cp, "auto"))
464464
jl_n_threads = (uint64_t)strtol(cp, NULL, 10); // ENV[NUM_THREADS_NAME] == "N"
465465
else
466-
jl_n_threads = jl_cpu_threads(); // ENV[NUM_THREADS_NAME] == "auto"
466+
jl_n_threads = jl_effective_threads(); // ENV[NUM_THREADS_NAME] == "auto"
467467
}
468468
if (jl_n_threads <= 0)
469469
jl_n_threads = 1;

test/threads.jl

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,42 @@ else
9393
end
9494
# Note also that libuv does not support affinity in macOS and it is known to
9595
# hang in FreeBSD. So, it's tested only in Linux and Windows:
96-
if Sys.islinux() || Sys.iswindows()
97-
if Sys.CPU_THREADS > 1 && !running_under_rr()
96+
const AFFINITY_SUPPORTED = (Sys.islinux() || Sys.iswindows()) && !running_under_rr()
97+
98+
if AFFINITY_SUPPORTED
99+
if Sys.CPU_THREADS > 1
98100
@test run_with_affinity([2]) == "2"
99101
@test run_with_affinity([1, 2]) == "1,2"
100102
end
101103
end
102104

105+
function get_nthreads(options = ``; cpus = nothing)
106+
cmd = `$(Base.julia_cmd()) --startup-file=no $(options)`
107+
cmd = `$cmd -e "print(Threads.nthreads())"`
108+
cmd = addenv(cmd, "JULIA_EXCLUSIVE" => "0", "JULIA_NUM_THREADS" => "auto")
109+
if cpus !== nothing
110+
cmd = setcpuaffinity(cmd, cpus)
111+
end
112+
return parse(Int, read(cmd, String))
113+
end
114+
115+
@testset "nthreads determined based on CPU affinity" begin
116+
if AFFINITY_SUPPORTED && Sys.CPU_THREADS 2
117+
@test get_nthreads() 2
118+
@test get_nthreads(cpus = [1]) == 1
119+
@test get_nthreads(cpus = [2]) == 1
120+
@test get_nthreads(cpus = [1, 2]) == 2
121+
@test get_nthreads(`-t1`, cpus = [1]) == 1
122+
@test get_nthreads(`-t1`, cpus = [2]) == 1
123+
@test get_nthreads(`-t1`, cpus = [1, 2]) == 1
124+
125+
if Sys.CPU_THREADS 3
126+
@test get_nthreads(cpus = [1, 3]) == 2
127+
@test get_nthreads(cpus = [2, 3]) == 2
128+
end
129+
end
130+
end
131+
103132
# issue #34769
104133
function idle_callback(handle)
105134
idle = @Base.handle_as handle UvTestIdle

0 commit comments

Comments
 (0)