Version in base suite: 20.11.7+really20.11.4-2 Base version: slurm-wlm_20.11.7+really20.11.4-2 Target version: slurm-wlm_20.11.7+really20.11.4-2+deb11u1 Base file: /srv/ftp-master.debian.org/ftp/pool/main/s/slurm-wlm/slurm-wlm_20.11.7+really20.11.4-2.dsc Target file: /srv/ftp-master.debian.org/policy/pool/main/s/slurm-wlm/slurm-wlm_20.11.7+really20.11.4-2+deb11u1.dsc changelog | 8 libslurm36.symbols | 3 patches/CVE-2022-29500 | 2152 +++++++++++++++++++++++++++++++++++++++++++++++++ patches/CVE-2022-29501 | 152 +++ patches/series | 2 5 files changed, 2317 insertions(+) diff -Nru slurm-wlm-20.11.7+really20.11.4/debian/changelog slurm-wlm-20.11.7+really20.11.4/debian/changelog --- slurm-wlm-20.11.7+really20.11.4/debian/changelog 2021-07-14 22:00:35.000000000 +0000 +++ slurm-wlm-20.11.7+really20.11.4/debian/changelog 2022-05-06 19:14:09.000000000 +0000 @@ -1,3 +1,11 @@ +slurm-wlm (20.11.7+really20.11.4-2+deb11u1) bullseye-security; urgency=medium + + * Fix CVE-2022-29500 and CVE-2022-29501 + (Closes: #1010634, #1010633) + * Update libslurm symbols file + + -- Gennaro Oliva Fri, 06 May 2022 21:14:09 +0200 + slurm-wlm (20.11.7+really20.11.4-2) unstable; urgency=medium * Fix CVE-2021-31215 (Closes: #988439) diff -Nru slurm-wlm-20.11.7+really20.11.4/debian/libslurm36.symbols slurm-wlm-20.11.7+really20.11.4/debian/libslurm36.symbols --- slurm-wlm-20.11.7+really20.11.4/debian/libslurm36.symbols 2021-07-14 09:49:11.000000000 +0000 +++ slurm-wlm-20.11.7+really20.11.4/debian/libslurm36.symbols 2022-05-06 19:14:09.000000000 +0000 @@ -505,6 +505,7 @@ #MISSING: 20.11.2# slurm_get_node_features_plugins@Base 16.05.0 slurm_get_peer_addr@Base 1.3.8 #MISSING: 20.11.2# slurm_get_plugin_dir@Base 1.3.8 + slurm_get_plugin_hash_enable@Base 20.11.7+really20.11.4-2+deb11u1~ slurm_get_port@Base 20.11.2 #MISSING: 20.11.2# slurm_get_power_parameters@Base 15.08.0 #MISSING: 20.11.2# slurm_get_power_plugin@Base 15.08.0 @@ -809,6 +810,7 @@ slurm_msg_recvfrom_timeout@Base 15.08.0 slurm_msg_sendto@Base 15.08.0 slurm_msg_sendto_timeout@Base 15.08.0 + slurm_msg_set_r_uid@Base 20.11.7+really20.11.4-2+deb11u1~ slurm_msg_t_copy@Base 1.3.8 slurm_msg_t_init@Base 1.3.8 #MISSING: 17.11.2# slurm_net_accept_stream@Base 1.3.8 @@ -955,6 +957,7 @@ slurm_receive_msg@Base 1.3.8 slurm_receive_msg_and_forward@Base 1.3.8 slurm_receive_msgs@Base 1.3.8 + slurm_receive_resp_msgs@Base 20.11.7+really20.11.4-2+deb11u1~ slurm_reconfigure@Base 1.3.8 slurm_recv_timeout@Base 15.08.0 slurm_rehash_node@Base 20.02.1 diff -Nru slurm-wlm-20.11.7+really20.11.4/debian/patches/CVE-2022-29500 slurm-wlm-20.11.7+really20.11.4/debian/patches/CVE-2022-29500 --- slurm-wlm-20.11.7+really20.11.4/debian/patches/CVE-2022-29500 1970-01-01 00:00:00.000000000 +0000 +++ slurm-wlm-20.11.7+really20.11.4/debian/patches/CVE-2022-29500 2022-05-06 19:14:09.000000000 +0000 @@ -0,0 +1,2152 @@ +Description: Fix CVE-2022-29500 + Prevent credential abuse. +Author: Dominik Bartkiewicz +Last-Update: 2022-05-04 + +--- a/src/api/config_info.c ++++ b/src/api/config_info.c +@@ -1914,6 +1914,7 @@ + } + req_msg.msg_type = REQUEST_DAEMON_STATUS; + req_msg.data = NULL; ++ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY); + + rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0); + +--- a/src/api/job_info.c ++++ b/src/api/job_info.c +@@ -1511,6 +1511,7 @@ + req.job_pid = job_pid; + req_msg.msg_type = REQUEST_JOB_ID; + req_msg.data = &req; ++ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY); + + rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0); + +@@ -1878,6 +1879,7 @@ + + req_msg.msg_type = REQUEST_NETWORK_CALLERID; + req_msg.data = &req; ++ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY); + + if (slurm_send_recv_node_msg(&req_msg, &resp_msg, 0) < 0) + return SLURM_ERROR; +--- a/src/api/job_step_info.c ++++ b/src/api/job_step_info.c +@@ -584,6 +584,7 @@ + __func__, step_id, node_list); + + slurm_msg_t_init(&req_msg); ++ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY); + + memcpy(&req, step_id, sizeof(req)); + memcpy(&resp_out->step_id, step_id, sizeof(resp_out->step_id)); +@@ -695,6 +696,7 @@ + __func__, step_id, node_list); + + slurm_msg_t_init(&req_msg); ++ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY); + + memcpy(&req, step_id, sizeof(req)); + memcpy(&resp_out->step_id, step_id, sizeof(resp_out->step_id)); +--- a/src/api/node_info.c ++++ b/src/api/node_info.c +@@ -862,6 +862,7 @@ + req.delta = delta; + req_msg.msg_type = REQUEST_ACCT_GATHER_ENERGY; + req_msg.data = &req; ++ slurm_msg_set_r_uid(&req_msg, SLURM_AUTH_UID_ANY); + + rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0); + +--- a/src/api/pmi_server.c ++++ b/src/api/pmi_server.c +@@ -142,6 +142,7 @@ + slurm_msg_t msg_send; + + slurm_msg_t_init(&msg_send); ++ slurm_msg_set_r_uid(&msg_send, SLURM_AUTH_UID_ANY); + + debug2("KVS_Barrier msg to %s:%hu", + msg_arg_ptr->bar_ptr->hostname, +--- a/src/api/reconfigure.c ++++ b/src/api/reconfigure.c +@@ -157,6 +157,7 @@ + slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_CONNECTION_ERROR); + } + ++ slurm_msg_set_r_uid(req, slurm_conf.slurm_user_id); + if (slurm_send_node_msg(fd, req) < 0) { + close(fd); + slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_SEND_ERROR); +--- a/src/api/signal.c ++++ b/src/api/signal.c +@@ -59,6 +59,7 @@ + slurm_msg_t *msg = xmalloc(sizeof(slurm_msg_t)); + + slurm_msg_t_init(msg); ++ slurm_msg_set_r_uid(msg, SLURM_AUTH_UID_ANY); + msg->msg_type = type; + msg->data = data; + +@@ -102,6 +103,7 @@ + rpc.flags = KILL_JOB_BATCH; + + slurm_msg_t_init(&msg); ++ slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id); + msg.msg_type = REQUEST_SIGNAL_TASKS; + msg.data = &rpc; + if (slurm_conf_get_addr(name, &msg.address, msg.flags) +@@ -160,6 +162,7 @@ + + slurm_msg_t_init(&msg); + msg.msg_type = REQUEST_TERMINATE_TASKS; ++ slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id); + msg.data = &rpc; + + if (slurm_conf_get_addr(name, &msg.address, msg.flags) +--- a/src/api/slurm_pmi.c ++++ b/src/api/slurm_pmi.c +@@ -180,6 +180,7 @@ + _set_pmi_time(); + + slurm_msg_t_init(&msg_send); ++ slurm_msg_set_r_uid(&msg_send, SLURM_AUTH_UID_ANY); + msg_send.address = srun_addr; + msg_send.msg_type = PMI_KVS_PUT_REQ; + msg_send.data = (void *) kvs_set_ptr; +@@ -261,6 +262,7 @@ + data.port = slurm_get_port(&slurm_addr); + data.hostname = hostname; + slurm_msg_t_init(&msg_send); ++ slurm_msg_set_r_uid(&msg_send, SLURM_AUTH_UID_ANY); + slurm_msg_t_init(&msg_rcv); + msg_send.address = srun_addr; + msg_send.msg_type = PMI_KVS_GET_REQ; +@@ -345,6 +347,7 @@ + if (kvs_set_ptr->kvs_host_ptr[i].port == 0) + continue; /* empty */ + slurm_msg_t_init(&msg_send); ++ slurm_msg_set_r_uid(&msg_send, SLURM_AUTH_UID_ANY); + msg_send.msg_type = PMI_KVS_GET_RESP; + msg_send.data = (void *) kvs_set_ptr; + slurm_set_addr(&msg_send.address, +--- a/src/api/step_launch.c ++++ b/src/api/step_launch.c +@@ -902,6 +902,7 @@ + hostlist_destroy(hl); + + RESEND: slurm_msg_t_init(&req); ++ slurm_msg_set_r_uid(&req, SLURM_AUTH_UID_ANY); + req.msg_type = REQUEST_SIGNAL_TASKS; + req.data = &msg; + +@@ -1720,6 +1721,7 @@ + } + + slurm_msg_t_init(&msg); ++ slurm_msg_set_r_uid(&msg, SLURM_AUTH_UID_ANY); + msg.msg_type = REQUEST_LAUNCH_TASKS; + msg.data = launch_msg; + +--- a/src/bcast/file_bcast.c ++++ b/src/bcast/file_bcast.c +@@ -168,6 +168,7 @@ + slurm_msg_t msg; + + slurm_msg_t_init(&msg); ++ slurm_msg_set_r_uid(&msg, SLURM_AUTH_UID_ANY); + msg.data = bcast_msg; + msg.flags = USE_BCAST_NETWORK; + msg.forward.tree_width = params->fanout; +--- a/src/common/forward.c ++++ b/src/common/forward.c +@@ -243,7 +243,7 @@ + /* steps, fwd_msg->timeout); */ + } + +- ret_list = slurm_receive_msgs(fd, steps, fwd_msg->timeout); ++ ret_list = slurm_receive_resp_msgs(fd, steps, fwd_msg->timeout); + /* info("sent %d forwards got %d back", */ + /* fwd_msg->header.forward.cnt, list_count(ret_list)); */ + +@@ -352,6 +352,9 @@ + send_msg.flags = fwd_tree->orig_msg->flags; + send_msg.data = fwd_tree->orig_msg->data; + send_msg.protocol_version = fwd_tree->orig_msg->protocol_version; ++ if (fwd_tree->orig_msg->restrict_uid_set) ++ slurm_msg_set_r_uid(&send_msg, ++ fwd_tree->orig_msg->restrict_uid); + + /* repeat until we are sure the message was sent */ + while ((name = hostlist_shift(fwd_tree->tree_hl))) { +--- a/src/common/slurm_auth.c ++++ b/src/common/slurm_auth.c +@@ -62,12 +62,16 @@ + typedef struct { + uint32_t (*plugin_id); + char (*plugin_type); +- void * (*create) (char *auth_info); ++ bool (*hash_enable); ++ void * (*create) (char *auth_info, uid_t r_uid, ++ void *data, int dlen); + int (*destroy) (void *cred); + int (*verify) (void *cred, char *auth_info); + uid_t (*get_uid) (void *cred); + gid_t (*get_gid) (void *cred); + char * (*get_host) (void *cred); ++ int (*get_data) (void *cred, char **data, ++ uint32_t *len); + int (*pack) (void *cred, Buf buf, + uint16_t protocol_version); + void * (*unpack) (Buf buf, uint16_t protocol_version); +@@ -82,12 +86,14 @@ + static const char *syms[] = { + "plugin_id", + "plugin_type", ++ "hash_enable", + "slurm_auth_create", + "slurm_auth_destroy", + "slurm_auth_verify", + "slurm_auth_get_uid", + "slurm_auth_get_gid", + "slurm_auth_get_host", ++ "auth_p_get_data", + "slurm_auth_pack", + "slurm_auth_unpack", + "slurm_auth_thread_config", +@@ -95,6 +101,17 @@ + "slurm_auth_token_generate", + }; + ++typedef struct { ++ int plugin_id; ++ char *type; ++} auth_plugin_types_t; ++ ++auth_plugin_types_t auth_plugin_types[] = { ++ { AUTH_PLUGIN_NONE, "auth/none" }, ++ { AUTH_PLUGIN_MUNGE, "auth/munge" }, ++ { AUTH_PLUGIN_JWT, "auth/jwt" }, ++}; ++ + /* + * A global authentication context. "Global" in the sense that there's + * only one, with static bindings. We don't export it. +@@ -104,6 +121,15 @@ + static int g_context_num = -1; + static pthread_mutex_t context_lock = PTHREAD_MUTEX_INITIALIZER; + ++extern bool slurm_get_plugin_hash_enable(int index) ++{ ++ if (slurm_auth_init(NULL) < 0) ++ return true; ++ ++ return *(ops[index].hash_enable); ++ ++} ++ + extern int slurm_auth_init(char *auth_type) + { + int retval = SLURM_SUCCESS; +@@ -235,14 +261,15 @@ + * the API function dispatcher. + */ + +-void *g_slurm_auth_create(int index, char *auth_info) ++void *g_slurm_auth_create(int index, char *auth_info, uid_t r_uid, ++ void *data, int dlen) + { + cred_wrapper_t *cred; + + if (slurm_auth_init(NULL) < 0) + return NULL; + +- cred = (*(ops[index].create))(auth_info); ++ cred = (*(ops[index].create))(auth_info, r_uid, data, dlen); + if (cred) + cred->index = index; + return cred; +@@ -298,6 +325,16 @@ + return (*(ops[wrap->index].get_host))(cred); + } + ++int auth_g_get_data(void *cred, char **data, uint32_t *len) ++{ ++ cred_wrapper_t *wrap = (cred_wrapper_t *) cred; ++ ++ if (!wrap || slurm_auth_init(NULL) < 0) ++ return SLURM_ERROR; ++ ++ return (*(ops[wrap->index].get_data))(cred, data, len); ++} ++ + int g_slurm_auth_pack(void *cred, Buf buf, uint16_t protocol_version) + { + cred_wrapper_t *wrap = (cred_wrapper_t *) cred; +--- a/src/common/slurm_auth.h ++++ b/src/common/slurm_auth.h +@@ -64,6 +64,12 @@ + #define SLURM_AUTH_NOBODY 99 + + /* ++ * This should be equal to MUNGE_UID_ANY ++ * do not restrict decode via uid ++ */ ++#define SLURM_AUTH_UID_ANY -1 ++ ++/* + * Default auth_index value, corresponds to the primary AuthType used. + */ + #define AUTH_DEFAULT_INDEX 0 +@@ -87,14 +93,22 @@ + extern int slurm_auth_index(void *cred); + + /* ++ * Check if plugin type corresponding to the authentication ++ * plugin index supports hash. ++ */ ++extern bool slurm_get_plugin_hash_enable(int index); ++ ++/* + * Static bindings for the global authentication context. + */ +-extern void *g_slurm_auth_create(int index, char *auth_info); ++extern void *g_slurm_auth_create(int index, char *auth_info, uid_t r_uid, ++ void *data, int dlen); + extern int g_slurm_auth_destroy(void *cred); + extern int g_slurm_auth_verify(void *cred, char *auth_info); + extern uid_t g_slurm_auth_get_uid(void *cred); + extern gid_t g_slurm_auth_get_gid(void *cred); + extern char *g_slurm_auth_get_host(void *cred); ++extern int auth_g_get_data(void *cred, char **data, uint32_t *len); + extern int g_slurm_auth_pack(void *cred, Buf buf, uint16_t protocol_version); + extern void *g_slurm_auth_unpack(Buf buf, uint16_t protocol_version); + +--- a/src/common/slurm_persist_conn.c ++++ b/src/common/slurm_persist_conn.c +@@ -612,6 +612,7 @@ + req_msg.flags |= SLURM_GLOBAL_AUTH_KEY; + if (persist_conn->flags & PERSIST_FLAG_DBD) + req_msg.flags |= SLURMDBD_CONNECTION; ++ slurm_msg_set_r_uid(&req_msg, persist_conn->r_uid); + + memset(&req, 0, sizeof(persist_init_req_msg_t)); + req.cluster_name = persist_conn->cluster_name; +--- a/src/common/slurm_persist_conn.h ++++ b/src/common/slurm_persist_conn.h +@@ -76,6 +76,7 @@ + uint16_t flags; + bool inited; + persist_conn_type_t persist_type; ++ uid_t r_uid; + char *rem_host; + uint16_t rem_port; + time_t *shutdown; +--- a/src/common/slurm_protocol_api.c ++++ b/src/common/slurm_protocol_api.c +@@ -173,6 +173,36 @@ + return proto_conf; + } + ++static int _check_hash(buf_t *buffer, header_t *header, slurm_msg_t *msg, ++ void *cred) ++{ ++ char *cred_hash = NULL; ++ uint32_t cred_hash_len = 0; ++ int rc; ++ static time_t config_update = (time_t) -1; ++ static bool block_null_hash = true; ++ ++ if (config_update != slurm_conf.last_update) { ++ block_null_hash = (xstrcasestr(slurm_conf.comm_params, ++ "block_null_hash")); ++ config_update = slurm_conf.last_update; ++ } ++ ++ rc = auth_g_get_data(cred, &cred_hash, &cred_hash_len); ++ ++ if (cred_hash || cred_hash_len) { ++ if (cred_hash_len != 3 || cred_hash[0] != 1 || ++ memcmp(cred_hash + 1, ++ &msg->msg_type, sizeof(msg->msg_type))) ++ rc = SLURM_ERROR; ++ } else if (block_null_hash && ++ slurm_get_plugin_hash_enable(msg->auth_index)) ++ rc = SLURM_ERROR; ++ ++ xfree(cred_hash); ++ return rc; ++} ++ + static int _get_tres_id(char *type, char *name) + { + slurmdb_tres_rec_t tres_rec; +@@ -985,6 +1015,7 @@ + msg->body_offset = get_buf_offset(buffer); + + if ((header.body_length > remaining_buf(buffer)) || ++ _check_hash(buffer, &header, msg, auth_cred) || + (unpack_msg(msg, buffer) != SLURM_SUCCESS)) { + rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET; + (void) g_slurm_auth_destroy(auth_cred); +@@ -1087,6 +1118,8 @@ + */ + if (slurm_msg_recvfrom_timeout(fd, &buf, &buflen, 0, timeout) < 0) { + rc = errno; ++ if (!rc) ++ rc = SLURMCTLD_COMMUNICATIONS_RECEIVE_ERROR; + goto endit; + } + +@@ -1247,6 +1280,7 @@ + msg.flags = header.flags; + + if ((header.body_length > remaining_buf(buffer)) || ++ _check_hash(buffer, &header, &msg, auth_cred) || + (unpack_msg(&msg, buffer) != SLURM_SUCCESS)) { + (void) g_slurm_auth_destroy(auth_cred); + free_buf(buffer); +@@ -1288,6 +1322,155 @@ + + } + ++List slurm_receive_resp_msgs(int fd, int steps, int timeout) ++{ ++ char *buf = NULL; ++ size_t buflen = 0; ++ header_t header; ++ int rc; ++ void *auth_cred = NULL; ++ slurm_msg_t msg; ++ buf_t *buffer; ++ ret_data_info_t *ret_data_info = NULL; ++ List ret_list = NULL; ++ int orig_timeout = timeout; ++ ++ xassert(fd >= 0); ++ ++ slurm_msg_t_init(&msg); ++ msg.conn_fd = fd; ++ ++ if (timeout <= 0) { ++ /* convert secs to msec */ ++ timeout = slurm_conf.msg_timeout * 1000; ++ orig_timeout = timeout; ++ } ++ if (steps) { ++ if (message_timeout < 0) ++ message_timeout = slurm_conf.msg_timeout * 1000; ++ orig_timeout = (timeout - ++ (message_timeout*(steps-1)))/steps; ++ steps--; ++ } ++ ++ log_flag(NET, "%s: orig_timeout was %d we have %d steps and a timeout of %d", ++ __func__, orig_timeout, steps, timeout); ++ /* we compare to the orig_timeout here because that is really ++ * what we are going to wait for each step ++ */ ++ if (orig_timeout >= (slurm_conf.msg_timeout * 10000)) { ++ log_flag(NET, "%s: Sending a message with timeout's greater than %d seconds, requested timeout is %d seconds", ++ __func__, (slurm_conf.msg_timeout * 10), ++ (timeout/1000)); ++ } else if (orig_timeout < 1000) { ++ log_flag(NET, "%s: Sending a message with a very short timeout of %d milliseconds each step in the tree has %d milliseconds", ++ __func__, timeout, orig_timeout); ++ } ++ ++ ++ /* ++ * Receive a msg. slurm_msg_recvfrom() will read the message ++ * length and allocate space on the heap for a buffer containing ++ * the message. ++ */ ++ if (slurm_msg_recvfrom_timeout(fd, &buf, &buflen, 0, timeout) < 0) { ++ forward_init(&header.forward); ++ rc = errno; ++ goto total_return; ++ } ++ ++ log_flag_hex(NET_RAW, buf, buflen, "%s: read", __func__); ++ buffer = create_buf(buf, buflen); ++ ++ if (unpack_header(&header, buffer) == SLURM_ERROR) { ++ free_buf(buffer); ++ rc = SLURM_COMMUNICATIONS_RECEIVE_ERROR; ++ goto total_return; ++ } ++ ++ if (check_header_version(&header) < 0) { ++ slurm_addr_t resp_addr; ++ if (!slurm_get_peer_addr(fd, &resp_addr)) { ++ error("%s: Invalid Protocol Version %u from at %pA", ++ __func__, header.version, &resp_addr); ++ } else { ++ error("%s: Invalid Protocol Version %u from problem connection: %m", ++ __func__, header.version); ++ } ++ ++ free_buf(buffer); ++ rc = SLURM_PROTOCOL_VERSION_ERROR; ++ goto total_return; ++ } ++ //info("ret_cnt = %d",header.ret_cnt); ++ if (header.ret_cnt > 0) { ++ if (header.ret_list) ++ ret_list = header.ret_list; ++ else ++ ret_list = list_create(destroy_data_info); ++ header.ret_cnt = 0; ++ header.ret_list = NULL; ++ } ++ ++ /* Forward message to other nodes */ ++ if (header.forward.cnt > 0) { ++ error("%s: We need to forward this to other nodes use slurm_receive_msg_and_forward instead", ++ __func__); ++ } ++ ++ if (!(auth_cred = g_slurm_auth_unpack(buffer, header.version))) { ++ error("%s: auth_g_unpack: %m", __func__); ++ free_buf(buffer); ++ rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET; ++ goto total_return; ++ } ++ g_slurm_auth_destroy(auth_cred); ++ /* ++ * Unpack message body ++ */ ++ msg.protocol_version = header.version; ++ msg.msg_type = header.msg_type; ++ msg.flags = header.flags; ++ ++ if ((header.body_length > remaining_buf(buffer)) || ++ (unpack_msg(&msg, buffer) != SLURM_SUCCESS)) { ++ free_buf(buffer); ++ rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET; ++ goto total_return; ++ } ++ free_buf(buffer); ++ rc = SLURM_SUCCESS; ++ ++total_return: ++ destroy_forward(&header.forward); ++ ++ if (rc != SLURM_SUCCESS) { ++ if (ret_list) { ++ ret_data_info = xmalloc(sizeof(ret_data_info_t)); ++ ret_data_info->err = rc; ++ ret_data_info->type = RESPONSE_FORWARD_FAILED; ++ ret_data_info->data = NULL; ++ list_push(ret_list, ret_data_info); ++ } ++ ++ error("%s: failed: %s", ++ __func__, slurm_strerror(rc)); ++ usleep(10000); /* Discourage brute force attack */ ++ } else { ++ if (!ret_list) ++ ret_list = list_create(destroy_data_info); ++ ret_data_info = xmalloc(sizeof(ret_data_info_t)); ++ ret_data_info->err = rc; ++ ret_data_info->node_name = NULL; ++ ret_data_info->type = msg.msg_type; ++ ret_data_info->data = msg.data; ++ list_push(ret_list, ret_data_info); ++ } ++ ++ errno = rc; ++ return ret_list; ++ ++} + /* try to determine the UID associated with a message with different + * message header version, return -1 if we can't tell */ + static int _unpack_msg_uid(Buf buffer, uint16_t protocol_version) +@@ -1465,6 +1648,7 @@ + msg->flags = header.flags; + + if ( (header.body_length > remaining_buf(buffer)) || ++ _check_hash(buffer, &header, msg, auth_cred) || + (unpack_msg(msg, buffer) != SLURM_SUCCESS) ) { + (void) g_slurm_auth_destroy(auth_cred); + free_buf(buffer); +@@ -1532,6 +1716,7 @@ + int rc; + void * auth_cred; + time_t start_time = time(NULL); ++ unsigned char auth_payload[3] = { 1 }; /* uint8_t + uint16_t (msg_type) */ + + if (msg->conn) { + persist_msg_t persist_msg; +@@ -1564,6 +1749,9 @@ + return rc; + } + ++ if (!msg->restrict_uid_set) ++ fatal("%s: restrict_uid is not set", __func__); ++ memcpy(auth_payload + 1, &msg->msg_type, sizeof(msg->msg_type)); + /* + * Initialize header with Auth credential and message type. + * We get the credential now rather than later so the work can +@@ -1573,10 +1761,14 @@ + */ + if (msg->flags & SLURM_GLOBAL_AUTH_KEY) { + auth_cred = g_slurm_auth_create(msg->auth_index, +- _global_auth_key()); ++ _global_auth_key(), ++ msg->restrict_uid, auth_payload, ++ sizeof(auth_payload)); + } else { + auth_cred = g_slurm_auth_create(msg->auth_index, +- slurm_conf.authinfo); ++ slurm_conf.authinfo, ++ msg->restrict_uid, auth_payload, ++ sizeof(auth_payload)); + } + + if (msg->forward.init != FORWARD_INIT) { +@@ -1593,10 +1785,16 @@ + (void) g_slurm_auth_destroy(auth_cred); + if (msg->flags & SLURM_GLOBAL_AUTH_KEY) { + auth_cred = g_slurm_auth_create(msg->auth_index, +- _global_auth_key()); ++ _global_auth_key(), ++ msg->restrict_uid, ++ auth_payload, ++ sizeof(auth_payload)); + } else { + auth_cred = g_slurm_auth_create(msg->auth_index, +- slurm_conf.authinfo); ++ slurm_conf.authinfo, ++ msg->restrict_uid, ++ auth_payload, ++ sizeof(auth_payload)); + } + } + if (auth_cred == NULL) { +@@ -1849,6 +2047,24 @@ + resp_msg->protocol_version = msg->protocol_version; + resp_msg->ret_list = msg->ret_list; + resp_msg->orig_addr = msg->orig_addr; ++ /* ++ * Extra sanity check. This should always be set. But if for some ++ * reason it isn't, restrict the decode to avoid leaking an ++ * unrestricted authentication token. ++ * ++ * Implicitly trust communications initiated by SlurmUser and ++ * SlurmdUser. In future releases this won't matter - there's ++ * no point packing an auth token on the reply as it isn't checked, ++ * but we're stuck doing that on older protocol versions for ++ * backwards-compatibility. ++ */ ++ if (!msg->auth_uid_set) ++ slurm_msg_set_r_uid(resp_msg, SLURM_AUTH_NOBODY); ++ else if ((msg->auth_uid != slurm_conf.slurm_user_id) && ++ (msg->auth_uid != slurm_conf.slurmd_user_id)) ++ slurm_msg_set_r_uid(resp_msg, msg->auth_uid); ++ else ++ slurm_msg_set_r_uid(resp_msg, SLURM_AUTH_UID_ANY); + } + + static void _rc_msg_setup(slurm_msg_t *msg, slurm_msg_t *resp_msg, +@@ -2130,6 +2346,7 @@ + forward_init(&request_msg->forward); + request_msg->ret_list = NULL; + request_msg->forward_struct = NULL; ++ slurm_msg_set_r_uid(request_msg, SLURM_AUTH_UID_ANY); + + tryagain: + retry = 1; +@@ -2257,6 +2474,8 @@ + goto cleanup; + } + ++ slurm_msg_set_r_uid(req, slurm_conf.slurm_user_id); ++ + if ((rc = slurm_send_node_msg(fd, req)) < 0) { + rc = SLURM_ERROR; + } else { +@@ -2638,6 +2857,12 @@ + } + } + ++extern void slurm_msg_set_r_uid(slurm_msg_t *msg, uid_t r_uid) ++{ ++ msg->restrict_uid = r_uid; ++ msg->restrict_uid_set = true; ++} ++ + extern char *nodelist_nth_host(const char *nodelist, int inx) + { + hostlist_t hl = hostlist_create(nodelist); +@@ -2819,6 +3044,7 @@ + req.len = len; + req.data = (char *)data; + ++ slurm_msg_set_r_uid(&msg, SLURM_AUTH_UID_ANY); + msg.msg_type = REQUEST_FORWARD_DATA; + msg.data = &req; + +--- a/src/common/slurm_protocol_api.h ++++ b/src/common/slurm_protocol_api.h +@@ -286,6 +286,7 @@ + * errno set. + */ + List slurm_receive_msgs(int fd, int steps, int timeout); ++List slurm_receive_resp_msgs(int fd, int steps, int timeout); + + /* + * Receive a slurm message on the open slurm descriptor "fd". This will also +@@ -600,6 +601,8 @@ + extern void slurm_free_msg_members(slurm_msg_t *msg); + extern void slurm_free_msg(slurm_msg_t * msg); + ++extern void slurm_msg_set_r_uid(slurm_msg_t *msg, uid_t r_uid); ++ + /* must free this memory with free not xfree */ + extern char *nodelist_nth_host(const char *nodelist, int inx); + extern int nodelist_find(const char *nodelist, const char *name); +--- a/src/common/slurm_protocol_defs.c ++++ b/src/common/slurm_protocol_defs.c +@@ -168,6 +168,8 @@ + #endif + + dest->orig_addr.ss_family = AF_UNSPEC; ++ if (src->auth_uid_set) ++ slurm_msg_set_r_uid(dest, src->auth_uid); + } + + /* here to add \\ to all \" in a string this needs to be xfreed later */ +--- a/src/common/slurm_protocol_defs.h ++++ b/src/common/slurm_protocol_defs.h +@@ -59,6 +59,7 @@ + #include "src/common/job_options.h" + #include "src/common/list.h" + #include "src/common/macros.h" ++#include "src/common/slurm_auth.h" + #include "src/common/slurm_cred.h" + #include "src/common/slurm_protocol_common.h" + #include "src/common/slurm_persist_conn.h" +@@ -516,6 +517,8 @@ + * slurm_msg_t_init() was not called since + * auth_uid would be root. + */ ++ uid_t restrict_uid; ++ bool restrict_uid_set; + uint32_t body_offset; /* DON'T PACK: offset in buffer where body part of + buffer starts. */ + Buf buffer; /* DON't PACK! ptr to buffer that msg was unpacked from. */ +--- a/src/common/slurmdb_defs.c ++++ b/src/common/slurmdb_defs.c +@@ -3032,6 +3032,7 @@ + slurm_set_addr(&req.address, port, host); + + req.protocol_version = rpc_version; ++ slurm_msg_set_r_uid(&req, SLURM_AUTH_UID_ANY); + + req.msg_type = ACCOUNTING_UPDATE_MSG; + if (slurmdbd_conf) +--- a/src/common/stepd_api.c ++++ b/src/common/stepd_api.c +@@ -417,7 +417,7 @@ + int + stepd_attach(int fd, uint16_t protocol_version, + slurm_addr_t *ioaddr, slurm_addr_t *respaddr, +- void *job_cred_sig, reattach_tasks_response_msg_t *resp) ++ void *job_cred_sig, uid_t uid, reattach_tasks_response_msg_t *resp) + { + int req = REQUEST_ATTACH; + int rc = SLURM_SUCCESS; +@@ -427,6 +427,7 @@ + safe_write(fd, ioaddr, sizeof(slurm_addr_t)); + safe_write(fd, respaddr, sizeof(slurm_addr_t)); + safe_write(fd, job_cred_sig, SLURM_IO_KEY_SIZE); ++ safe_write(fd, &uid, sizeof(uid_t)); + safe_write(fd, &protocol_version, sizeof(uint16_t)); + } else + goto rwfail; +--- a/src/common/stepd_api.h ++++ b/src/common/stepd_api.h +@@ -184,7 +184,8 @@ + */ + int stepd_attach(int fd, uint16_t protocol_version, + slurm_addr_t *ioaddr, slurm_addr_t *respaddr, +- void *job_cred_sig, reattach_tasks_response_msg_t *resp); ++ void *job_cred_sig, uid_t uid, ++ reattach_tasks_response_msg_t *resp); + + /* + * Scan for available running slurm step daemons by checking +--- a/src/plugins/accounting_storage/common/common_as.c ++++ b/src/plugins/accounting_storage/common/common_as.c +@@ -397,6 +397,7 @@ + out_msg.msg_type = ACCOUNTING_FIRST_REG; + out_msg.flags = SLURM_GLOBAL_AUTH_KEY; + out_msg.data = &update; ++ slurm_msg_set_r_uid(&out_msg, SLURM_AUTH_UID_ANY); + slurm_send_node_msg(fd, &out_msg); + /* We probably need to add matching recv_msg function + * for an arbitray fd or should these be fire +--- a/src/plugins/accounting_storage/slurmdbd/dbd_conn.c ++++ b/src/plugins/accounting_storage/slurmdbd/dbd_conn.c +@@ -94,6 +94,8 @@ + else + pc->flags &= (~PERSIST_FLAG_SUPPRESS_ERR); + ++ pc->r_uid = SLURM_AUTH_UID_ANY; ++ + if (((rc = slurm_persist_conn_open(pc)) != SLURM_SUCCESS) && + backup_host) { + xfree(pc->rem_host); +--- a/src/plugins/auth/jwt/auth_jwt.c ++++ b/src/plugins/auth/jwt/auth_jwt.c +@@ -75,6 +75,7 @@ + const char plugin_type[] = "auth/jwt"; + const uint32_t plugin_id = AUTH_PLUGIN_JWT; + const uint32_t plugin_version = SLURM_VERSION_NUMBER; ++bool hash_enable = false; + + typedef struct { + int index; /* MUST ALWAYS BE FIRST. DO NOT PACK. */ +@@ -187,7 +188,8 @@ + return SLURM_SUCCESS; + } + +-auth_token_t *slurm_auth_create(char *auth_info) ++auth_token_t *slurm_auth_create(char *auth_info, uid_t r_uid, ++ void *data, int dlen) + { + return xmalloc(sizeof(auth_token_t)); + } +@@ -344,6 +346,18 @@ + return NULL; + } + ++int auth_p_get_data(auth_token_t *cred, char **data, uint32_t *len) ++{ ++ if (cred == NULL) { ++ slurm_seterrno(ESLURM_AUTH_BADARG); ++ return SLURM_ERROR; ++ } ++ ++ *data = NULL; ++ *len = 0; ++ return SLURM_SUCCESS; ++} ++ + int slurm_auth_pack(auth_token_t *cred, Buf buf, uint16_t protocol_version) + { + char *pack_this = (thread_token) ? thread_token : token; +--- a/src/plugins/auth/munge/auth_munge.c ++++ b/src/plugins/auth/munge/auth_munge.c +@@ -85,6 +85,7 @@ + const char plugin_type[] = "auth/munge"; + const uint32_t plugin_id = AUTH_PLUGIN_MUNGE; + const uint32_t plugin_version = SLURM_VERSION_NUMBER; ++bool hash_enable = true; + + static int bad_cred_test = -1; + +@@ -100,6 +101,8 @@ + bool verified; /* true if this cred has been verified */ + uid_t uid; /* UID. valid only if verified == true */ + gid_t gid; /* GID. valid only if verified == true */ ++ void *data; /* payload data */ ++ int dlen; /* payload data length */ + } slurm_auth_credential_t; + + /* Static prototypes */ +@@ -128,7 +131,8 @@ + * allocate a credential. Whether the credential is populated with useful + * data at this time is implementation-dependent. + */ +-slurm_auth_credential_t *slurm_auth_create(char *opts) ++slurm_auth_credential_t *slurm_auth_create(char *opts, uid_t r_uid, ++ void *data, int dlen) + { + int rc, retry = RETRY_COUNT, auth_ttl; + slurm_auth_credential_t *cred = NULL; +@@ -153,6 +157,13 @@ + } + } + ++ rc = munge_ctx_set(ctx, MUNGE_OPT_UID_RESTRICTION, r_uid); ++ if (rc != EMUNGE_SUCCESS) { ++ error("munge_ctx_set failure"); ++ munge_ctx_destroy(ctx); ++ return NULL; ++ } ++ + auth_ttl = slurm_get_auth_ttl(); + if (auth_ttl) + (void) munge_ctx_set(ctx, MUNGE_OPT_TTL, auth_ttl); +@@ -161,6 +172,8 @@ + cred->magic = MUNGE_MAGIC; + cred->verified = false; + cred->m_str = NULL; ++ cred->data = NULL; ++ cred->dlen = 0; + + /* + * Temporarily block SIGALARM to avoid misleading +@@ -171,7 +184,7 @@ + ohandler = xsignal(SIGALRM, (SigFunc *)SIG_BLOCK); + + again: +- err = munge_encode(&cred->m_str, ctx, NULL, 0); ++ err = munge_encode(&cred->m_str, ctx, data, dlen); + if (err != EMUNGE_SUCCESS) { + if ((err == EMUNGE_SOCKET) && retry--) { + debug("Munge encode failed: %s (retrying ...)", +@@ -212,6 +225,8 @@ + /* Note: Munge cred string not encoded with xmalloc() */ + if (cred->m_str) + free(cred->m_str); ++ if (cred->data) ++ free(cred->data); + + xfree(cred); + return SLURM_SUCCESS; +@@ -336,6 +351,34 @@ + } + + /* ++ * auth_p_verify() must be called first. ++ */ ++int auth_p_get_data(slurm_auth_credential_t *cred, char **data, uint32_t *len) ++{ ++ if (!cred || !cred->verified) { ++ /* ++ * This xassert will trigger on a development build if ++ * the calling path did not verify the credential first. ++ */ ++ xassert(!cred); ++ slurm_seterrno(ESLURM_AUTH_BADARG); ++ return SLURM_ERROR; ++ } ++ ++ xassert(cred->magic == MUNGE_MAGIC); ++ ++ if (cred->data && cred->dlen) { ++ *data = xmalloc(cred->dlen); ++ memcpy(*data, cred->data, cred->dlen); ++ *len = cred->dlen; ++ } else { ++ *data = NULL; ++ *len = 0; ++ } ++ return SLURM_SUCCESS; ++} ++ ++/* + * Marshall a credential for transmission over the network, according to + * Slurm's marshalling protocol. + */ +@@ -426,7 +469,7 @@ + } + + again: +- err = munge_decode(c->m_str, ctx, NULL, NULL, &c->uid, &c->gid); ++ err = munge_decode(c->m_str, ctx, &c->data, &c->dlen, &c->uid, &c->gid); + if (err != EMUNGE_SUCCESS) { + if ((err == EMUNGE_SOCKET) && retry--) { + debug("Munge decode failed: %s (retrying ...)", +--- a/src/plugins/auth/none/auth_none.c ++++ b/src/plugins/auth/none/auth_none.c +@@ -75,6 +75,7 @@ + const char plugin_type[] = "auth/none"; + const uint32_t plugin_id = AUTH_PLUGIN_NONE; + const uint32_t plugin_version = SLURM_VERSION_NUMBER; ++bool hash_enable = false; + + /* + * An opaque type representing authentication credentials. This type can be +@@ -138,7 +139,8 @@ + * Allocate and initializes a credential. This function should return + * NULL if it cannot allocate a credential. + */ +-slurm_auth_credential_t *slurm_auth_create(char *auth_info) ++slurm_auth_credential_t *slurm_auth_create(char *auth_info, uid_t r_uid, ++ void *data, int dlen) + { + slurm_auth_credential_t *cred = xmalloc(sizeof(*cred)); + +@@ -217,6 +219,19 @@ + return xstrdup(cred->hostname); + } + ++int auth_p_get_data(slurm_auth_credential_t *cred, char **data, uint32_t *len) ++{ ++ if (!cred) { ++ slurm_seterrno(ESLURM_AUTH_BADARG); ++ return SLURM_ERROR; ++ } ++ ++ *data = NULL; ++ *len = 0; ++ ++ return SLURM_SUCCESS; ++} ++ + /* + * Marshall a credential for transmission over the network, according to + * Slurm's marshalling protocol. +--- a/src/plugins/mpi/pmi2/setup.c ++++ b/src/plugins/mpi/pmi2/setup.c +@@ -111,6 +111,8 @@ + else + job_info.step_id.job_id = job->step_id.job_id; + ++ job_info.uid = job->uid; ++ + if (job->het_job_offset != NO_VAL) { + job_info.step_id.step_id = job->step_id.step_id; + job_info.step_id.step_het_comp = job->step_id.step_het_comp; +--- a/src/plugins/mpi/pmi2/setup.h ++++ b/src/plugins/mpi/pmi2/setup.h +@@ -58,6 +58,7 @@ + + typedef struct pmi2_job_info { + slurm_step_id_t step_id; /* Current step id struct */ ++ uid_t uid; /* user id for job */ + uint32_t nnodes; /* number of nodes in current job step */ + uint32_t nodeid; /* relative position of this node in job */ + uint32_t ntasks; /* total number of tasks in current job */ +--- a/src/plugins/mpi/pmi2/spawn.c ++++ b/src/plugins/mpi/pmi2/spawn.c +@@ -150,8 +150,8 @@ + spawn_subcmd_t *subcmd; + void *auth_cred; + +- auth_cred = g_slurm_auth_create(AUTH_DEFAULT_INDEX, +- slurm_conf.authinfo); ++ auth_cred = g_slurm_auth_create(AUTH_DEFAULT_INDEX, slurm_conf.authinfo, ++ job_info.uid, NULL, 0); + if (auth_cred == NULL) { + error("authentication: %m"); + return; +@@ -210,6 +210,7 @@ + } + if (g_slurm_auth_verify(auth_cred, slurm_conf.authinfo)) { + error("authentication: %m"); ++ g_slurm_auth_destroy(auth_cred); + return SLURM_ERROR; + } + auth_uid = g_slurm_auth_get_uid(auth_cred); +--- a/src/plugins/mpi/pmix/pmixp_dconn.c ++++ b/src/plugins/mpi/pmix/pmixp_dconn.c +@@ -79,6 +79,7 @@ + _pmixp_dconn_conns[i].nodeid = i; + _pmixp_dconn_conns[i].state = PMIXP_DIRECT_INIT; + _pmixp_dconn_conns[i].priv = _pmixp_dconn_h.init(i, direct_hdr); ++ _pmixp_dconn_conns[i].uid = slurm_conf.slurmd_user_id; + } + return SLURM_SUCCESS; + } +--- a/src/plugins/mpi/pmix/pmixp_dconn.h ++++ b/src/plugins/mpi/pmix/pmixp_dconn.h +@@ -82,6 +82,9 @@ + /* remote node info */ + int nodeid; + void *priv; ++ ++ /* authenticated uid on remote */ ++ uid_t uid; + } pmixp_dconn_t; + + typedef void *(*pmixp_dconn_p2p_init_t)(int nodeid, +--- a/src/plugins/mpi/pmix/pmixp_server.c ++++ b/src/plugins/mpi/pmix/pmixp_server.c +@@ -495,13 +495,13 @@ + * --------------------- Authentication functionality ------------------- + */ + +-static int _auth_cred_create(Buf buf) ++static int _auth_cred_create(Buf buf, uid_t uid) + { + void *auth_cred = NULL; + int rc = SLURM_SUCCESS; + +- auth_cred = g_slurm_auth_create(AUTH_DEFAULT_INDEX, +- slurm_conf.authinfo); ++ auth_cred = g_slurm_auth_create(AUTH_DEFAULT_INDEX, slurm_conf.authinfo, ++ uid, NULL, 0); + if (!auth_cred) { + PMIXP_ERROR("Creating authentication credential: %m"); + return errno; +@@ -520,7 +520,7 @@ + return rc; + } + +-static int _auth_cred_verify(Buf buf) ++static int _auth_cred_verify(Buf buf, uid_t *uid) + { + void *auth_cred = NULL; + int rc = SLURM_SUCCESS; +@@ -537,8 +537,18 @@ + + rc = g_slurm_auth_verify(auth_cred, slurm_conf.authinfo); + +- if (rc) ++ if (rc) { + PMIXP_ERROR("Verifying authentication credential: %m"); ++ } else { ++ uid_t auth_uid; ++ auth_uid = g_slurm_auth_get_uid(auth_cred); ++ if ((auth_uid != slurm_conf.slurmd_user_id) && ++ (auth_uid != _pmixp_job_info.uid)) { ++ PMIXP_ERROR("Credential from uid %u", auth_uid); ++ rc = SLURM_ERROR; ++ } ++ *uid = auth_uid; ++ } + g_slurm_auth_destroy(auth_cred); + return rc; + } +@@ -705,7 +715,7 @@ + pmixp_base_hdr_t bhdr; + init_msg = xmalloc(sizeof(*init_msg)); + +- rc = _auth_cred_create(buf_init); ++ rc = _auth_cred_create(buf_init, dconn->uid); + if (rc) { + FREE_NULL_BUFFER(init_msg->buf_ptr); + xfree(init_msg); +@@ -1247,6 +1257,7 @@ + Buf buf_msg; + int rc; + char *nodename = NULL; ++ uid_t uid = SLURM_AUTH_NOBODY; + + if (!hdr->ext_flag) { + nodename = pmixp_info_job_host(hdr->nodeid); +@@ -1270,7 +1281,7 @@ + return; + } + /* Unpack and verify the auth credential */ +- rc = _auth_cred_verify(buf_msg); ++ rc = _auth_cred_verify(buf_msg, &uid); + FREE_NULL_BUFFER(buf_msg); + if (rc) { + close(fd); +@@ -1294,6 +1305,9 @@ + xfree(nodename); + return; + } ++ ++ dconn->uid = uid; ++ + new_conn = pmixp_conn_new_persist(PMIXP_PROTO_DIRECT, + pmixp_dconn_engine(dconn), + _direct_new_msg_conn, +--- a/src/plugins/mpi/pmix/pmixp_utils.c ++++ b/src/plugins/mpi/pmix/pmixp_utils.c +@@ -418,6 +418,7 @@ + msg.forward.timeout = timeout; + msg.forward.cnt = 0; + msg.forward.nodelist = NULL; ++ slurm_msg_set_r_uid(&msg, slurm_conf.slurmd_user_id); + ret_list = slurm_send_addr_recv_msgs(&msg, (char*)nodename, timeout); + if (!ret_list) { + /* This should never happen (when this was +--- a/src/sattach/sattach.c ++++ b/src/sattach/sattach.c +@@ -412,6 +412,7 @@ + reattach_msg.io_port = io_ports; + reattach_msg.cred = fake_cred; + ++ slurm_msg_set_r_uid(&msg, SLURM_AUTH_UID_ANY); + msg.msg_type = REQUEST_REATTACH_TASKS; + msg.data = &reattach_msg; + msg.protocol_version = layout->start_protocol_ver; +--- a/src/slurmctld/agent.c ++++ b/src/slurmctld/agent.c +@@ -144,6 +144,7 @@ + uint16_t retry; /* if set, keep trying */ + thd_t *thread_struct; /* thread structures */ + bool get_reply; /* flag if reply expected */ ++ uid_t r_uid; /* receiver UID */ + slurm_msg_type_t msg_type; /* RPC to be issued */ + void **msg_args_pptr; /* RPC data to be used */ + uint16_t protocol_version; /* if set, use this version */ +@@ -157,6 +158,7 @@ + uint32_t *threads_active_ptr; /* currently active thread ptr */ + thd_t *thread_struct_ptr; /* thread structures ptr */ + bool get_reply; /* flag if reply expected */ ++ uid_t r_uid; /* receiver UID */ + slurm_msg_type_t msg_type; /* RPC to be issued */ + void *msg_args_ptr; /* ptr to RPC data to be used */ + uint16_t protocol_version; /* if set, use this version */ +@@ -312,11 +314,12 @@ + /* start the watchdog thread */ + slurm_thread_create(&thread_wdog, _wdog, agent_info_ptr); + +- log_flag(AGENT, "%s: New agent thread_count:%d threads_active:%d retry:%c get_reply:%c msg_type:%s protocol_version:%hu", ++ log_flag(AGENT, "%s: New agent thread_count:%d threads_active:%d retry:%c get_reply:%c r_uid:%u msg_type:%s protocol_version:%hu", + __func__, agent_info_ptr->thread_count, + agent_info_ptr->threads_active, + agent_info_ptr->retry ? 'T' : 'F', + agent_info_ptr->get_reply ? 'T' : 'F', ++ agent_info_ptr->r_uid, + rpc_num2string(agent_arg_ptr->msg_type), + agent_info_ptr->protocol_version); + +@@ -415,6 +418,11 @@ + __func__, agent_arg_ptr->node_count, hostlist_cnt); + return SLURM_ERROR; /* no messages to be sent */ + } ++ if (!agent_arg_ptr->r_uid_set) { ++ error("%s: r_uid not set for message:%u ", ++ __func__, agent_arg_ptr->msg_type); ++ return SLURM_ERROR; ++ } + return SLURM_SUCCESS; + } + +@@ -437,6 +445,7 @@ + thread_ptr = xcalloc(agent_info_ptr->thread_count, sizeof(thd_t)); + memset(thread_ptr, 0, (agent_info_ptr->thread_count * sizeof(thd_t))); + agent_info_ptr->thread_struct = thread_ptr; ++ agent_info_ptr->r_uid = agent_arg_ptr->r_uid; + agent_info_ptr->msg_type = agent_arg_ptr->msg_type; + agent_info_ptr->msg_args_pptr = &agent_arg_ptr->msg_args; + agent_info_ptr->protocol_version = agent_arg_ptr->protocol_version; +@@ -520,6 +529,7 @@ + task_info_ptr->threads_active_ptr= &agent_info_ptr->threads_active; + task_info_ptr->thread_struct_ptr = &agent_info_ptr->thread_struct[inx]; + task_info_ptr->get_reply = agent_info_ptr->get_reply; ++ task_info_ptr->r_uid = agent_info_ptr->r_uid; + task_info_ptr->msg_type = agent_info_ptr->msg_type; + task_info_ptr->msg_args_ptr = *agent_info_ptr->msg_args_pptr; + task_info_ptr->protocol_version = agent_info_ptr->protocol_version; +@@ -916,6 +926,7 @@ + + msg.msg_type = msg_type; + msg.data = task_ptr->msg_args_ptr; ++ slurm_msg_set_r_uid(&msg, task_ptr->r_uid); + + log_flag(AGENT, "%s: sending %s to %s", + __func__, rpc_num2string(msg_type), thread_ptr->nodelist); +@@ -1285,6 +1296,8 @@ + agent_arg_ptr->msg_args = *(agent_info_ptr->msg_args_pptr); + *(agent_info_ptr->msg_args_pptr) = NULL; + ++ set_agent_arg_r_uid(agent_arg_ptr, agent_info_ptr->r_uid); ++ + j = 0; + for (i = 0; i < agent_info_ptr->thread_count; i++) { + if (!thread_ptr[i].ret_list) { +@@ -2287,3 +2300,10 @@ + } + xfree(argv[1]); + } ++ ++/* Set r_uid of agent_arg */ ++extern void set_agent_arg_r_uid(agent_arg_t *agent_arg_ptr, uid_t r_uid) ++{ ++ agent_arg_ptr->r_uid = r_uid; ++ agent_arg_ptr->r_uid_set = true; ++} +--- a/src/slurmctld/agent.h ++++ b/src/slurmctld/agent.h +@@ -54,6 +54,8 @@ + uint32_t node_count; /* number of nodes to communicate + * with */ + uint16_t retry; /* if set, keep trying */ ++ uid_t r_uid; /* receiver UID */ ++ bool r_uid_set; /* True if receiver UID set*/ + slurm_addr_t *addr; /* if set will send to this + addr not hostlist */ + hostlist_t hostlist; /* hostlist containing the +@@ -114,4 +116,7 @@ + /* Return length of agent's retry_list */ + extern int retry_list_size(void); + ++/* Set r_uid of agent_arg */ ++extern void set_agent_arg_r_uid(agent_arg_t *agent_arg_ptr, uid_t r_uid); ++ + #endif /* !_AGENT_H */ +--- a/src/slurmctld/backup.c ++++ b/src/slurmctld/backup.c +@@ -392,8 +392,8 @@ + slurm_msg_t_init(&msg); + if (slurm_receive_msg(newsockfd, &msg, 0) != 0) + error("slurm_receive_msg: %m"); +- +- _background_process_msg(&msg); ++ else ++ _background_process_msg(&msg); + + slurm_free_msg_members(&msg); + +@@ -414,6 +414,10 @@ + int error_code = SLURM_SUCCESS; + bool send_rc = true; + ++ if (!msg->auth_uid_set) ++ fatal("%s: received message without previously validated auth", ++ __func__); ++ + if (msg->msg_type != REQUEST_PING) { + bool super_user = false; + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred); +@@ -460,6 +464,7 @@ + slurm_msg_t_init(&req); + slurm_set_addr(&req.address, ping->slurmctld_port, ping->control_addr); + req.msg_type = REQUEST_CONTROL_STATUS; ++ slurm_msg_set_r_uid(&req, SLURM_AUTH_UID_ANY); + if (slurm_send_recv_node_msg(&req, &resp, 0) == SLURM_SUCCESS) { + switch (resp.msg_type) { + case RESPONSE_CONTROL_STATUS: +@@ -601,6 +606,7 @@ + xfree(arg); + + slurm_msg_t_init(&req); ++ slurm_msg_set_r_uid(&req, slurm_conf.slurm_user_id); + slurm_set_addr(&req.address, slurm_conf.slurmctld_port, + slurm_conf.control_addr[shutdown_inx]); + if (do_shutdown) { +--- a/src/slurmctld/controller.c ++++ b/src/slurmctld/controller.c +@@ -1849,6 +1849,7 @@ + reboot_agent_args->hostlist); + debug("Queuing reboot request for nodes %s", host_str); + xfree(host_str); ++ set_agent_arg_r_uid(reboot_agent_args, SLURM_AUTH_UID_ANY); + agent_queue_request(reboot_agent_args); + last_node_update = now; + schedule_node_save(); +@@ -2710,6 +2711,7 @@ + xfree(arg); + + slurm_msg_t_init(&req); ++ slurm_msg_set_r_uid(&req, slurm_conf.slurm_user_id); + slurm_set_addr(&req.address, slurm_conf.slurmctld_port, + slurm_conf.control_addr[bu_inx]); + if (do_shutdown) { +--- a/src/slurmctld/fed_mgr.c ++++ b/src/slurmctld/fed_mgr.c +@@ -364,6 +364,8 @@ + persist_conn->rem_port = cluster->control_port; + } + ++ persist_conn->r_uid = SLURM_AUTH_UID_ANY; ++ + rc = slurm_persist_conn_open(persist_conn); + if (rc != SLURM_SUCCESS) { + if (_comm_fail_log(cluster)) { +--- a/src/slurmctld/job_mgr.c ++++ b/src/slurmctld/job_mgr.c +@@ -6076,6 +6076,7 @@ + signal_tasks_msg->signal = signal; + + agent_args->msg_args = signal_tasks_msg; ++ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); + agent_queue_request(agent_args); + return; + } +@@ -14410,8 +14411,7 @@ + + reply: + if ((rc != ESLURM_JOB_SETTING_DB_INX) && (msg->conn_fd >= 0)) { +- slurm_msg_t_init(&resp_msg); +- resp_msg.protocol_version = msg->protocol_version; ++ response_init(&resp_msg, msg); + if (resp_array) { + resp_array_msg = _resp_array_xlate(resp_array, job_id); + resp_msg.msg_type = RESPONSE_JOB_ARRAY_ERRORS; +@@ -14421,7 +14421,6 @@ + rc_msg.return_code = rc; + resp_msg.data = &rc_msg; + } +- resp_msg.conn = msg->conn; + slurm_send_node_msg(msg->conn_fd, &resp_msg); + + if (resp_array_msg) { +@@ -14509,6 +14508,7 @@ + } + + agent_args->msg_args = kill_job; ++ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); + agent_queue_request(agent_args); + return; + } +@@ -14904,6 +14904,7 @@ + agent_info->msg_type = REQUEST_ABORT_JOB; + agent_info->msg_args = kill_req; + ++ set_agent_arg_r_uid(agent_info, SLURM_AUTH_UID_ANY); + agent_queue_request(agent_info); + } + +@@ -14972,6 +14973,7 @@ + agent_info->msg_type = REQUEST_ABORT_JOB; + agent_info->msg_args = kill_req; + agent_info->protocol_version = protocol_version; ++ set_agent_arg_r_uid(agent_info, SLURM_AUTH_UID_ANY); + agent_queue_request(agent_info); + bit_free(tmp_node_bitmap); + } +@@ -15025,6 +15027,7 @@ + agent_info->msg_type = REQUEST_TERMINATE_JOB; + agent_info->msg_args = kill_req; + ++ set_agent_arg_r_uid(agent_info, SLURM_AUTH_UID_ANY); + agent_queue_request(agent_info); + } + +@@ -15968,6 +15971,7 @@ + } + + agent_args->msg_args = signal_job_msg; ++ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); + agent_queue_request(agent_args); + return; + } +@@ -16047,6 +16051,7 @@ + } + + agent_args->msg_args = sus_ptr; ++ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); + agent_queue_request(agent_args); + return; + } +@@ -16412,6 +16417,7 @@ + memset(&rc_msg, 0, sizeof(rc_msg)); + rc_msg.return_code = rc; + resp_msg.data = &rc_msg; ++ slurm_msg_set_r_uid(&resp_msg, uid); + slurm_send_node_msg(conn_fd, &resp_msg); + } + return rc; +@@ -16562,6 +16568,7 @@ + rc_msg.return_code = rc; + resp_msg.data = &rc_msg; + } ++ slurm_msg_set_r_uid(&resp_msg, uid); + slurm_send_node_msg(conn_fd, &resp_msg); + + if (resp_array_msg) { +@@ -17295,6 +17302,7 @@ + memset(&rc_msg, 0, sizeof(rc_msg)); + rc_msg.return_code = rc; + resp_msg.data = &rc_msg; ++ slurm_msg_set_r_uid(&resp_msg, uid); + slurm_send_node_msg(conn_fd, &resp_msg); + } + +--- a/src/slurmctld/job_scheduler.c ++++ b/src/slurmctld/job_scheduler.c +@@ -2591,6 +2591,7 @@ + agent_arg_ptr->hostlist = hostlist_create(launch_job_ptr->batch_host); + agent_arg_ptr->msg_type = REQUEST_BATCH_JOB_LAUNCH; + agent_arg_ptr->msg_args = (void *) launch_msg_ptr; ++ set_agent_arg_r_uid(agent_arg_ptr, SLURM_AUTH_UID_ANY); + + /* Launch the RPC via agent */ + agent_queue_request(agent_arg_ptr); +@@ -4317,6 +4318,7 @@ + rc = SLURM_ERROR; + } + xfree(nodes); ++ set_agent_arg_r_uid(reboot_agent_args, SLURM_AUTH_UID_ANY); + agent_queue_request(reboot_agent_args); + } + +@@ -4348,6 +4350,7 @@ + rc = SLURM_ERROR; + } + xfree(nodes); ++ set_agent_arg_r_uid(reboot_agent_args, SLURM_AUTH_UID_ANY); + agent_queue_request(reboot_agent_args); + } + +--- a/src/slurmctld/node_mgr.c ++++ b/src/slurmctld/node_mgr.c +@@ -3658,6 +3658,7 @@ + xfree (kill_agent_args); + } else { + debug ("Spawning agent msg_type=%d", msg_type); ++ set_agent_arg_r_uid(kill_agent_args, SLURM_AUTH_UID_ANY); + agent_queue_request(kill_agent_args); + } + } +@@ -3719,6 +3720,7 @@ + xfree(new_args); + } else { + debug("Spawning agent msg_type=%d", new_args->msg_type); ++ set_agent_arg_r_uid(new_args, SLURM_AUTH_UID_ANY); + agent_queue_request(new_args); + } + +@@ -3727,6 +3729,7 @@ + xfree(old_args); + } else { + debug("Spawning agent msg_type=%d", old_args->msg_type); ++ set_agent_arg_r_uid(old_args, SLURM_AUTH_UID_ANY); + agent_queue_request(old_args); + } + #else +--- a/src/slurmctld/node_scheduler.c ++++ b/src/slurmctld/node_scheduler.c +@@ -413,6 +413,7 @@ + } + + agent_args->msg_args = kill_job; ++ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); + agent_queue_request(agent_args); + return; + } +@@ -2959,6 +2960,7 @@ + } + + /* Launch the RPC via agent */ ++ set_agent_arg_r_uid(agent_arg_ptr, SLURM_AUTH_UID_ANY); + agent_queue_request(agent_arg_ptr); + } + +@@ -4289,6 +4291,7 @@ + last_job_id = job_ptr->job_id; + hostlist_destroy(kill_hostlist); + agent_args->msg_args = kill_job; ++ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); + agent_queue_request(agent_args); + return; + } +--- a/src/slurmctld/ping_nodes.c ++++ b/src/slurmctld/ping_nodes.c +@@ -340,6 +340,7 @@ + debug("Spawning ping agent for %s", host_str); + xfree(host_str); + ping_begin(); ++ set_agent_arg_r_uid(ping_agent_args, SLURM_AUTH_UID_ANY); + agent_queue_request(ping_agent_args); + } + +@@ -354,6 +355,7 @@ + host_str, reg_agent_args->node_count); + xfree(host_str); + ping_begin(); ++ set_agent_arg_r_uid(reg_agent_args, SLURM_AUTH_UID_ANY); + agent_queue_request(reg_agent_args); + } + +@@ -510,6 +512,7 @@ + debug("Spawning health check agent for %s", host_str); + xfree(host_str); + ping_begin(); ++ set_agent_arg_r_uid(check_agent_args, SLURM_AUTH_UID_ANY); + agent_queue_request(check_agent_args); + } + } +@@ -568,6 +571,7 @@ + log_flag(ENERGY, "Updating acct_gather data for %s", host_str); + xfree(host_str); + ping_begin(); ++ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); + agent_queue_request(agent_args); + } + } +--- a/src/slurmctld/proc_req.c ++++ b/src/slurmctld/proc_req.c +@@ -229,6 +229,8 @@ + resp->conn = msg->conn; + resp->flags = msg->flags; + resp->protocol_version = msg->protocol_version; ++ if (msg->auth_uid_set) ++ slurm_msg_set_r_uid(resp, msg->auth_uid); + } + + /* +--- a/src/slurmctld/srun_comm.c ++++ b/src/slurmctld/srun_comm.c +@@ -57,7 +57,7 @@ + */ + static void _srun_agent_launch(slurm_addr_t *addr, char *host, + slurm_msg_type_t type, void *msg_args, +- uint16_t protocol_version) ++ uid_t r_uid, uint16_t protocol_version) + { + agent_arg_t *agent_args = xmalloc(sizeof(agent_arg_t)); + +@@ -67,6 +67,7 @@ + agent_args->hostlist = hostlist_create(host); + agent_args->msg_type = type; + agent_args->msg_args = msg_args; ++ set_agent_arg_r_uid(agent_args, r_uid); + agent_args->protocol_version = protocol_version; + + agent_queue_request(agent_args); +@@ -145,6 +146,7 @@ + msg_arg = build_alloc_msg(job_ptr, SLURM_SUCCESS, NULL); + _srun_agent_launch(addr, job_ptr->alloc_node, + RESPONSE_RESOURCE_ALLOCATION, msg_arg, ++ job_ptr->user_id, + job_ptr->start_protocol_ver); + } else if (_pending_het_jobs(job_ptr)) { + return; +@@ -169,6 +171,7 @@ + list_iterator_destroy(iter); + _srun_agent_launch(addr, job_ptr->alloc_node, + RESPONSE_HET_JOB_ALLOCATION, job_resp_list, ++ job_ptr->user_id, + job_ptr->start_protocol_ver); + } else { + error("%s: Can not find hetjob leader %pJ", +@@ -195,7 +198,7 @@ + msg_arg->step_het_comp = NO_VAL; + _srun_agent_launch(addr, job_ptr->alloc_node, + SRUN_JOB_COMPLETE, +- msg_arg, ++ msg_arg, job_ptr->user_id, + job_ptr->start_protocol_ver); + } + } +@@ -248,7 +251,8 @@ + sizeof(msg_arg->step_id)); + msg_arg->nodelist = xstrdup(node_name); + _srun_agent_launch(addr, step_ptr->host, SRUN_NODE_FAIL, +- msg_arg, step_ptr->start_protocol_ver); ++ msg_arg, job_ptr->user_id, ++ step_ptr->start_protocol_ver); + } + list_iterator_destroy(step_iterator); + +@@ -261,7 +265,8 @@ + msg_arg->step_id.step_het_comp = NO_VAL; + msg_arg->nodelist = xstrdup(node_name); + _srun_agent_launch(addr, job_ptr->alloc_node, SRUN_NODE_FAIL, +- msg_arg, job_ptr->start_protocol_ver); ++ msg_arg, job_ptr->user_id, ++ job_ptr->start_protocol_ver); + } + } + +@@ -297,7 +302,7 @@ + msg_arg = xmalloc(sizeof(srun_ping_msg_t)); + msg_arg->job_id = job_ptr->job_id; + _srun_agent_launch(addr, job_ptr->alloc_node, +- SRUN_PING, msg_arg, ++ SRUN_PING, msg_arg, job_ptr->user_id, + job_ptr->start_protocol_ver); + } + } +@@ -327,6 +332,7 @@ + memcpy(&msg_arg->step_id, &step_ptr->step_id, sizeof(msg_arg->step_id)); + msg_arg->timeout = timeout_val; + _srun_agent_launch(addr, step_ptr->host, SRUN_TIMEOUT, msg_arg, ++ step_ptr->job_ptr->user_id, + step_ptr->start_protocol_ver); + } + +@@ -354,7 +360,8 @@ + msg_arg->step_id.step_het_comp = NO_VAL; + msg_arg->timeout = job_ptr->end_time; + _srun_agent_launch(addr, job_ptr->alloc_node, SRUN_TIMEOUT, +- msg_arg, job_ptr->start_protocol_ver); ++ msg_arg, job_ptr->user_id, ++ job_ptr->start_protocol_ver); + } + + +@@ -384,7 +391,8 @@ + msg_arg->job_id = job_ptr->job_id; + msg_arg->msg = xstrdup(msg); + _srun_agent_launch(addr, job_ptr->resp_host, SRUN_USER_MSG, +- msg_arg, job_ptr->start_protocol_ver); ++ msg_arg, job_ptr->user_id, ++ job_ptr->start_protocol_ver); + return SLURM_SUCCESS; + } else if (job_ptr->batch_flag && IS_JOB_RUNNING(job_ptr)) { + #ifndef HAVE_FRONT_END +@@ -425,6 +433,7 @@ + agent_arg_ptr->msg_type = REQUEST_JOB_NOTIFY; + agent_arg_ptr->msg_args = (void *) notify_msg_ptr; + /* Launch the RPC via agent */ ++ set_agent_arg_r_uid(agent_arg_ptr, SLURM_AUTH_UID_ANY); + agent_queue_request(agent_arg_ptr); + return SLURM_SUCCESS; + } +@@ -453,6 +462,7 @@ + msg_arg->step_het_comp = NO_VAL; + _srun_agent_launch(addr, job_ptr->alloc_node, + SRUN_JOB_COMPLETE, msg_arg, ++ job_ptr->user_id, + job_ptr->start_protocol_ver); + } + +@@ -487,6 +497,7 @@ + msg_arg->op = op; + _srun_agent_launch(addr, job_ptr->alloc_node, + SRUN_REQUEST_SUSPEND, msg_arg, ++ job_ptr->user_id, + job_ptr->start_protocol_ver); + msg_sent = true; + } +@@ -510,7 +521,8 @@ + memcpy(&msg_arg->step_id, &step_ptr->step_id, + sizeof(msg_arg->step_id)); + _srun_agent_launch(addr, step_ptr->host, SRUN_JOB_COMPLETE, +- msg_arg, step_ptr->start_protocol_ver); ++ msg_arg, step_ptr->job_ptr->user_id, ++ step_ptr->start_protocol_ver); + } + } + +@@ -534,7 +546,8 @@ + sizeof(msg_arg->step_id)); + msg_arg->nodelist = xstrdup(node_list); + _srun_agent_launch(addr, step_ptr->host, SRUN_STEP_MISSING, +- msg_arg, step_ptr->start_protocol_ver); ++ msg_arg, step_ptr->job_ptr->user_id, ++ step_ptr->start_protocol_ver); + } + } + +@@ -558,7 +571,8 @@ + sizeof(msg_arg->step_id)); + msg_arg->signal = signal; + _srun_agent_launch(addr, step_ptr->host, SRUN_STEP_SIGNAL, +- msg_arg, step_ptr->start_protocol_ver); ++ msg_arg, step_ptr->job_ptr->user_id, ++ step_ptr->start_protocol_ver); + } + } + +@@ -589,7 +603,8 @@ + for (i=0; iargv[i] = xstrdup(argv[i]); + _srun_agent_launch(addr, step_ptr->host, SRUN_EXEC, +- msg_arg, step_ptr->start_protocol_ver); ++ msg_arg, step_ptr->job_ptr->user_id, ++ step_ptr->start_protocol_ver); + } else { + error("srun_exec %pS lacks communication channel", + step_ptr); +--- a/src/slurmctld/step_mgr.c ++++ b/src/slurmctld/step_mgr.c +@@ -690,6 +690,7 @@ + } + + agent_args->msg_args = signal_tasks_msg; ++ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); + agent_queue_request(agent_args); + return; + } +@@ -735,6 +736,7 @@ + sizeof(signal_tasks_msg->step_id)); + signal_tasks_msg->signal = signal; + agent_args->msg_args = signal_tasks_msg; ++ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); + agent_queue_request(agent_args); + return; + } +@@ -4368,6 +4370,7 @@ + } + + agent_args->msg_args = kill_step; ++ set_agent_arg_r_uid(agent_args, SLURM_AUTH_UID_ANY); + agent_queue_request(agent_args); + return; + } +--- a/src/slurmd/slurmd/req.c ++++ b/src/slurmd/slurmd/req.c +@@ -431,7 +431,7 @@ + + static int + _send_slurmstepd_init(int fd, int type, void *req, +- slurm_addr_t *cli, slurm_addr_t *self, ++ slurm_addr_t *cli, uid_t cli_uid, slurm_addr_t *self, + hostset_t step_hset, uint16_t protocol_version) + { + int len = 0; +@@ -564,6 +564,7 @@ + safe_write(fd, get_buf_data(buffer), len); + free_buf(buffer); + buffer = NULL; ++ safe_write(fd, &cli_uid, sizeof(uid_t)); + + /* send self address over to slurmstepd */ + if (self) { +@@ -645,7 +646,7 @@ + */ + static int + _forkexec_slurmstepd(uint16_t type, void *req, +- slurm_addr_t *cli, slurm_addr_t *self, ++ slurm_addr_t *cli, uid_t cli_uid, slurm_addr_t *self, + const hostset_t step_hset, uint16_t protocol_version) + { + pid_t pid; +@@ -687,7 +688,7 @@ + error("Unable to close write to_slurmd in parent: %m"); + + if ((rc = _send_slurmstepd_init(to_stepd[1], type, +- req, cli, self, ++ req, cli, cli_uid, self, + step_hset, + protocol_version)) != 0) { + error("Unable to init slurmstepd"); +@@ -1576,8 +1577,9 @@ + } + + debug3("%s: call to _forkexec_slurmstepd", __func__); +- errnum = _forkexec_slurmstepd(LAUNCH_TASKS, (void *)req, cli, &self, +- step_hset, msg->protocol_version); ++ errnum = _forkexec_slurmstepd(LAUNCH_TASKS, (void *)req, cli, ++ msg->auth_uid, &self, step_hset, ++ msg->protocol_version); + debug3("%s: return from _forkexec_slurmstepd", __func__); + _launch_complete_add(req->step_id.job_id); + +@@ -2167,7 +2169,7 @@ + + debug3("%s: call to _forkexec_slurmstepd", __func__); + rc = _forkexec_slurmstepd(LAUNCH_TASKS, (void *)launch_req, +- cli, &self, step_hset, ++ cli, msg->auth_uid, &self, step_hset, + msg->protocol_version); + debug3("%s: return from _forkexec_slurmstepd %d", + __func__, rc); +@@ -2521,8 +2523,9 @@ + info("Launching batch job %u for UID %u", req->job_id, req->uid); + + debug3("_rpc_batch_job: call to _forkexec_slurmstepd"); +- rc = _forkexec_slurmstepd(LAUNCH_BATCH_JOB, (void *)req, cli, NULL, +- (hostset_t)NULL, SLURM_PROTOCOL_VERSION); ++ rc = _forkexec_slurmstepd(LAUNCH_BATCH_JOB, (void *)req, cli, ++ msg->auth_uid, NULL, (hostset_t)NULL, ++ SLURM_PROTOCOL_VERSION); + debug3("_rpc_batch_job: return from _forkexec_slurmstepd: %d", rc); + + slurm_mutex_unlock(&launch_mutex); +@@ -4377,7 +4380,7 @@ + + /* Following call fills in gtids and local_pids when successful. */ + rc = stepd_attach(fd, protocol_version, &ioaddr, +- &resp_msg.address, job_cred_sig, resp); ++ &resp_msg.address, job_cred_sig, msg->auth_uid, resp); + if (rc != SLURM_SUCCESS) { + debug2("stepd_attach call failed"); + goto done2; +--- a/src/slurmd/slurmd/slurmd.c ++++ b/src/slurmd/slurmd/slurmd.c +@@ -573,7 +573,11 @@ + * to are taken care of and sent back. This way the control + * also has a better idea what happened to us + */ +- slurm_send_rc_msg(msg, rc); ++ if (msg->auth_uid_set) ++ slurm_send_rc_msg(msg, rc); ++ else ++ debug("%s: incomplete message", __func__); ++ + goto cleanup; + } + debug2("Start processing RPC: %s", rpc_num2string(msg->msg_type)); +--- a/src/slurmd/slurmstepd/io.c ++++ b/src/slurmd/slurmstepd/io.c +@@ -1919,6 +1919,7 @@ + slurm_msg_t_init(&msg); + msg.protocol_version = srun->protocol_version; + msg.msg_type = TASK_USER_MANAGED_IO_STREAM; ++ slurm_msg_set_r_uid(&msg, srun->uid); + msg.data = &user_io_msg; + user_io_msg.task_id = gtid; + +--- a/src/slurmd/slurmstepd/mgr.c ++++ b/src/slurmd/slurmstepd/mgr.c +@@ -164,7 +164,7 @@ + static bool _access(const char *path, int modes, uid_t uid, + int ngids, gid_t *gids); + static void _send_launch_failure(launch_tasks_request_msg_t *, +- slurm_addr_t *, int, uint16_t); ++ slurm_addr_t *, uid_t, int, uint16_t); + static int _fork_all_tasks(stepd_step_rec_t *job, bool *io_initialized); + static int _become_user(stepd_step_rec_t *job, struct priv_state *ps); + static void _set_prio_process (stepd_step_rec_t *job); +@@ -201,7 +201,8 @@ + */ + extern stepd_step_rec_t * + mgr_launch_tasks_setup(launch_tasks_request_msg_t *msg, slurm_addr_t *cli, +- slurm_addr_t *self, uint16_t protocol_version) ++ uid_t cli_uid, slurm_addr_t *self, ++ uint16_t protocol_version) + { + stepd_step_rec_t *job = NULL; + +@@ -212,7 +213,8 @@ + * reset in _send_launch_failure. + */ + int fail = errno; +- _send_launch_failure(msg, cli, errno, protocol_version); ++ _send_launch_failure(msg, cli, cli_uid, errno, ++ protocol_version); + errno = fail; + return NULL; + } +@@ -663,6 +665,7 @@ + /* This should always be set to something else we have a bug. */ + xassert(srun->protocol_version); + resp.protocol_version = srun->protocol_version; ++ slurm_msg_set_r_uid(&resp, srun->uid); + + if (_send_srun_resp_msg(&resp, job->nnodes) != SLURM_SUCCESS) + error("Failed to send MESSAGE_TASK_EXIT: %m"); +@@ -761,6 +764,7 @@ + } + /*********************************************/ + slurm_msg_t_init(&req); ++ slurm_msg_set_r_uid(&req, slurm_conf.slurmd_user_id); + req.msg_type = REQUEST_STEP_COMPLETE; + req.data = &msg; + req.address = step_complete.parent_addr; +@@ -2336,8 +2340,8 @@ + } + + static void +-_send_launch_failure(launch_tasks_request_msg_t *msg, slurm_addr_t *cli, int rc, +- uint16_t protocol_version) ++_send_launch_failure(launch_tasks_request_msg_t *msg, slurm_addr_t *cli, ++ uid_t cli_uid, int rc, uint16_t protocol_version) + { + slurm_msg_t resp_msg; + launch_tasks_response_msg_t resp; +@@ -2372,6 +2376,7 @@ + resp_msg.data = &resp; + resp_msg.msg_type = RESPONSE_LAUNCH_TASKS; + resp_msg.protocol_version = protocol_version; ++ slurm_msg_set_r_uid(&resp_msg, cli_uid); + + memcpy(&resp.step_id, &msg->step_id, sizeof(resp.step_id)); + +@@ -2400,6 +2405,7 @@ + + slurm_msg_t_init(&resp_msg); + resp_msg.address = srun->resp_addr; ++ slurm_msg_set_r_uid(&resp_msg, srun->uid); + resp_msg.protocol_version = srun->protocol_version; + resp_msg.data = &resp; + resp_msg.msg_type = RESPONSE_LAUNCH_TASKS; +--- a/src/slurmd/slurmstepd/mgr.h ++++ b/src/slurmd/slurmstepd/mgr.h +@@ -53,7 +53,7 @@ + * Initialize a stepd_step_rec_t structure for a launch tasks + */ + stepd_step_rec_t *mgr_launch_tasks_setup(launch_tasks_request_msg_t *msg, +- slurm_addr_t *client, ++ slurm_addr_t *cli, uid_t cli_uid, + slurm_addr_t *self, + uint16_t protocol_version); + +--- a/src/slurmd/slurmstepd/req.c ++++ b/src/slurmd/slurmstepd/req.c +@@ -981,6 +981,7 @@ + safe_read(fd, &srun->ioaddr, sizeof(slurm_addr_t)); + safe_read(fd, &srun->resp_addr, sizeof(slurm_addr_t)); + safe_read(fd, srun->key, SLURM_IO_KEY_SIZE); ++ safe_read(fd, &srun->uid, sizeof(uid_t)); + safe_read(fd, &srun->protocol_version, sizeof(uint16_t)); + + if (!srun->protocol_version) +--- a/src/slurmd/slurmstepd/slurmstepd.c ++++ b/src/slurmd/slurmstepd/slurmstepd.c +@@ -75,15 +75,16 @@ + #include "src/slurmd/slurmstepd/slurmstepd.h" + #include "src/slurmd/slurmstepd/slurmstepd_job.h" + +-static int _init_from_slurmd(int sock, char **argv, slurm_addr_t **_cli, ++static int _init_from_slurmd(int sock, char **argv, ++ slurm_addr_t **_cli, uid_t *_cli_uid, + slurm_addr_t **_self, slurm_msg_t **_msg); + + static void _dump_user_env(void); + static void _send_ok_to_slurmd(int sock); + static void _send_fail_to_slurmd(int sock); + static void _got_ack_from_slurmd(int); +-static stepd_step_rec_t *_step_setup(slurm_addr_t *cli, slurm_addr_t *self, +- slurm_msg_t *msg); ++static stepd_step_rec_t *_step_setup(slurm_addr_t *cli, uid_t cli_uid, ++ slurm_addr_t *self, slurm_msg_t *msg); + #ifdef MEMORY_LEAK_DEBUG + static void _step_cleanup(stepd_step_rec_t *job, slurm_msg_t *msg, int rc); + #endif +@@ -107,6 +108,7 @@ + { + log_options_t lopts = LOG_OPTS_INITIALIZER; + slurm_addr_t *cli; ++ uid_t cli_uid; + slurm_addr_t *self; + slurm_msg_t *msg; + stepd_step_rec_t *job; +@@ -131,11 +133,11 @@ + fatal( "failed to initialize authentication plugin" ); + + /* Receive job parameters from the slurmd */ +- _init_from_slurmd(STDIN_FILENO, argv, &cli, &self, &msg); ++ _init_from_slurmd(STDIN_FILENO, argv, &cli, &cli_uid, &self, &msg); + + /* Create the stepd_step_rec_t, mostly from info in a + * launch_tasks_request_msg_t or a batch_job_launch_msg_t */ +- if (!(job = _step_setup(cli, self, msg))) { ++ if (!(job = _step_setup(cli, cli_uid, self, msg))) { + _send_fail_to_slurmd(STDOUT_FILENO); + rc = SLURM_ERROR; + goto ending; +@@ -501,7 +503,8 @@ + */ + static int + _init_from_slurmd(int sock, char **argv, +- slurm_addr_t **_cli, slurm_addr_t **_self, slurm_msg_t **_msg) ++ slurm_addr_t **_cli, uid_t *_cli_uid, slurm_addr_t **_self, ++ slurm_msg_t **_msg) + { + char *incoming_buffer = NULL; + Buf buffer; +@@ -509,6 +512,7 @@ + int len; + uint16_t proto; + slurm_addr_t *cli = NULL; ++ uid_t cli_uid; + slurm_addr_t *self = NULL; + slurm_msg_t *msg = NULL; + slurm_step_id_t step_id = { +@@ -560,6 +564,7 @@ + if (slurm_unpack_addr_no_alloc(cli, buffer) == SLURM_ERROR) + fatal("slurmstepd: problem with unpack of slurmd_conf"); + free_buf(buffer); ++ safe_read(sock, &cli_uid, sizeof(uid_t)); + + /* receive self from slurmd */ + safe_read(sock, &len, sizeof(int)); +@@ -660,6 +665,7 @@ + msg->protocol_version = proto; + + *_cli = cli; ++ *_cli_uid = cli_uid; + *_self = self; + *_msg = msg; + +@@ -671,7 +677,8 @@ + } + + static stepd_step_rec_t * +-_step_setup(slurm_addr_t *cli, slurm_addr_t *self, slurm_msg_t *msg) ++_step_setup(slurm_addr_t *cli, uid_t cli_uid, slurm_addr_t *self, ++ slurm_msg_t *msg) + { + stepd_step_rec_t *job = NULL; + +@@ -682,7 +689,7 @@ + break; + case REQUEST_LAUNCH_TASKS: + debug2("setup for a launch_task"); +- job = mgr_launch_tasks_setup(msg->data, cli, self, ++ job = mgr_launch_tasks_setup(msg->data, cli, cli_uid, self, + msg->protocol_version); + break; + default: +--- a/src/slurmd/slurmstepd/slurmstepd_job.c ++++ b/src/slurmd/slurmstepd/slurmstepd_job.c +@@ -443,7 +443,7 @@ + memset(&io_addr, 0, sizeof(slurm_addr_t)); + } + +- srun = srun_info_create(msg->cred, &resp_addr, &io_addr, ++ srun = srun_info_create(msg->cred, &resp_addr, &io_addr, job->uid, + protocol_version); + + job->profile = msg->profile; +@@ -608,7 +608,7 @@ + get_cred_gres(msg->cred, conf->node_name, + &job->job_gres_list, &job->step_gres_list); + +- srun = srun_info_create(NULL, NULL, NULL, NO_VAL16); ++ srun = srun_info_create(NULL, NULL, NULL, job->uid, NO_VAL16); + + list_append(job->sruns, (void *) srun); + +@@ -696,7 +696,7 @@ + + extern srun_info_t * + srun_info_create(slurm_cred_t *cred, slurm_addr_t *resp_addr, +- slurm_addr_t *ioaddr, uint16_t protocol_version) ++ slurm_addr_t *ioaddr, uid_t uid, uint16_t protocol_version) + { + char *data = NULL; + uint32_t len = 0; +@@ -707,6 +707,7 @@ + if (!protocol_version || (protocol_version == NO_VAL16)) + protocol_version = SLURM_PROTOCOL_VERSION; + srun->protocol_version = protocol_version; ++ srun->uid = uid; + /* + * If no credential was provided, return the empty + * srun info object. (This is used, for example, when +--- a/src/slurmd/slurmstepd/slurmstepd_job.h ++++ b/src/slurmd/slurmstepd/slurmstepd_job.h +@@ -68,6 +68,7 @@ + slurm_addr_t ioaddr; /* Address to connect on for normal I/O. + Spawn IO uses messages to the normal + resp_addr. */ ++ uid_t uid; /* user id for job */ + uint16_t protocol_version; /* protocol_version of the srun */ + } srun_info_t; + +@@ -262,7 +263,8 @@ + void stepd_step_rec_destroy(stepd_step_rec_t *job); + + srun_info_t * srun_info_create(slurm_cred_t *cred, slurm_addr_t *respaddr, +- slurm_addr_t *ioaddr, uint16_t protocol_version); ++ slurm_addr_t *ioaddr, uid_t uid, ++ uint16_t protocol_version); + + void srun_info_destroy(srun_info_t *srun); + +--- a/src/slurmd/slurmstepd/x11_forwarding.c ++++ b/src/slurmd/slurmstepd/x11_forwarding.c +@@ -72,6 +72,8 @@ + + /* Target salloc/srun host/port */ + static slurm_addr_t alloc_node; ++/* Target UID */ ++static uid_t job_uid; + /* X11 display hostname on target, or UNIX socket. */ + static char *x11_target = NULL; + /* X11 display port on target (if not a UNIX socket). */ +@@ -126,6 +128,7 @@ + slurm_msg_t_init(&resp); + + req.msg_type = SRUN_NET_FORWARD; ++ slurm_msg_set_r_uid(&req, job_uid); + req.data = &rpc; + + slurm_send_recv_msg(*remote, &req, &resp, 0); +@@ -237,6 +240,7 @@ + x11_target_port = job->x11_target_port; + + slurm_set_addr(&alloc_node, job->x11_alloc_port, job->x11_alloc_host); ++ job_uid = job->uid; + + debug("X11Parameters: %s", slurm_conf.x11_params); + +--- a/src/slurmdbd/read_config.c ++++ b/src/slurmdbd/read_config.c +@@ -660,6 +660,7 @@ + if (!slurmdbd_conf->purge_usage) + slurmdbd_conf->purge_usage = NO_VAL; + ++ slurm_conf.last_update = time(NULL); + slurm_mutex_unlock(&conf_mutex); + return SLURM_SUCCESS; + } +--- a/src/slurmdbd/slurmdbd.c ++++ b/src/slurmdbd/slurmdbd.c +@@ -850,6 +850,7 @@ + } else { + slurm_msg_t out_msg; + slurm_msg_t_init(&out_msg); ++ slurm_msg_set_r_uid(&out_msg, SLURM_AUTH_UID_ANY); + out_msg.msg_type = ACCOUNTING_REGISTER_CTLD; + out_msg.flags = SLURM_GLOBAL_AUTH_KEY; + out_msg.protocol_version = cluster_rec->rpc_version; diff -Nru slurm-wlm-20.11.7+really20.11.4/debian/patches/CVE-2022-29501 slurm-wlm-20.11.7+really20.11.4/debian/patches/CVE-2022-29501 --- slurm-wlm-20.11.7+really20.11.4/debian/patches/CVE-2022-29501 1970-01-01 00:00:00.000000000 +0000 +++ slurm-wlm-20.11.7+really20.11.4/debian/patches/CVE-2022-29501 2022-05-06 19:14:09.000000000 +0000 @@ -0,0 +1,152 @@ +Description: Fix CVE-2022-29501 + Prevent abuse of REQUEST_FORWARD_DATA. +Author: Dominik Bartkiewicz +Last-Update: 2022-05-04 + +--- a/src/plugins/mpi/pmi2/setup.c ++++ b/src/plugins/mpi/pmi2/setup.c +@@ -339,6 +339,11 @@ + unlink(sa.sun_path); + return SLURM_ERROR; + } ++ if (chown(sa.sun_path, job->uid, -1) < 0) { ++ error("mpi/pmi2: failed to chown tree socket: %m"); ++ unlink(sa.sun_path); ++ return SLURM_ERROR; ++ } + if (listen(tree_sock, 64) < 0) { + error("mpi/pmi2: failed to listen tree socket: %m"); + unlink(sa.sun_path); +--- a/src/slurmd/slurmd/req.c ++++ b/src/slurmd/slurmd/req.c +@@ -1712,6 +1712,88 @@ + exit(SLURM_SUCCESS); + } + ++/* ++ * Connect to unix socket based upon permissions of a different user ++ * IN sock_name - name of socket to open ++ * IN uid - User ID to use for file access check ++ * IN gid - Group ID to use for file access check ++ * OUT fd - File descriptor ++ * RET error or SLURM_SUCCESS ++ * */ ++static int _connect_as_other(char *sock_name, uid_t uid, gid_t gid, int *fd) ++{ ++ pid_t child; ++ int pipe[2]; ++ int rc = 0; ++ struct sockaddr_un sa; ++ ++ *fd = -1; ++ if (strlen(sock_name) >= sizeof(sa.sun_path)) { ++ error("%s: Unix socket path '%s' is too long. (%ld > %ld)", ++ __func__, sock_name, ++ (long int)(strlen(sock_name) + 1), ++ (long int)sizeof(sa.sun_path)); ++ return EINVAL; ++ } ++ ++ /* child process will setuid to the user, register the process ++ * with the container, and open the file for us. */ ++ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pipe) != 0) { ++ error("%s: Failed to open pipe: %m", __func__); ++ return SLURM_ERROR; ++ } ++ ++ child = fork(); ++ if (child == -1) { ++ error("%s: fork failure", __func__); ++ close(pipe[0]); ++ close(pipe[1]); ++ return SLURM_ERROR; ++ } else if (child > 0) { ++ int exit_status = -1; ++ close(pipe[0]); ++ (void) waitpid(child, &rc, 0); ++ if (WIFEXITED(rc) && (WEXITSTATUS(rc) == 0)) ++ *fd = receive_fd_over_pipe(pipe[1]); ++ exit_status = WEXITSTATUS(rc); ++ close(pipe[1]); ++ return exit_status; ++ } ++ ++ /* child process below here */ ++ ++ close(pipe[1]); ++ ++ if (setgid(gid) < 0) { ++ error("%s: uid:%u setgid(%u): %m", __func__, uid, gid); ++ _exit(errno); ++ } ++ if (setuid(uid) < 0) { ++ error("%s: getuid(%u): %m", __func__, uid); ++ _exit(errno); ++ } ++ ++ *fd = socket(AF_UNIX, SOCK_STREAM, 0); ++ if (*fd < 0) { ++ error("%s:failed creating UNIX domain socket: %m", __func__ ); ++ _exit(errno); ++ } ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.sun_family = AF_UNIX; ++ strcpy(sa.sun_path, sock_name); ++ while (((rc = connect(*fd, (struct sockaddr *)&sa, ++ SUN_LEN(&sa))) < 0) && (errno == EINTR)); ++ ++ if (rc < 0) { ++ debug2("%s: failed connecting to specified socket '%s': %m", ++ __func__, sock_name); ++ _exit(errno); ++ } ++ send_fd_over_pipe(pipe[0], *fd); ++ close(*fd); ++ _exit(SLURM_SUCCESS); ++} + + static void + _prolog_error(batch_job_launch_msg_t *req, int rc) +@@ -5883,7 +5965,7 @@ + { + forward_data_msg_t *req = (forward_data_msg_t *)msg->data; + uint32_t req_uid = (uint32_t) g_slurm_auth_get_uid(msg->auth_cred); +- struct sockaddr_un sa; ++ uint32_t req_gid = (uint32_t) g_slurm_auth_get_gid(msg->auth_cred); + int fd = -1, rc = 0; + + /* Make sure we adjust for the spool dir coming in on the address to +@@ -5894,31 +5976,8 @@ + debug3("Entering _rpc_forward_data, address: %s, len: %u", + req->address, req->len); + +- /* +- * If socket name would be truncated, emit error and exit +- */ +- if (strlen(req->address) >= sizeof(sa.sun_path)) { +- error("%s: Unix socket path '%s' is too long. (%ld > %ld)", +- __func__, req->address, +- (long int)(strlen(req->address) + 1), +- (long int)sizeof(sa.sun_path)); +- slurm_seterrno(EINVAL); +- rc = errno; +- goto done; +- } ++ rc = _connect_as_other(req->address, req_uid, req_gid, &fd); + +- /* connect to specified address */ +- fd = socket(AF_UNIX, SOCK_STREAM, 0); +- if (fd < 0) { +- rc = errno; +- error("failed creating UNIX domain socket: %m"); +- goto done; +- } +- memset(&sa, 0, sizeof(sa)); +- sa.sun_family = AF_UNIX; +- strcpy(sa.sun_path, req->address); +- while (((rc = connect(fd, (struct sockaddr *)&sa, SUN_LEN(&sa))) < 0) && +- (errno == EINTR)); + if (rc < 0) { + rc = errno; + debug2("failed connecting to specified socket '%s': %m", diff -Nru slurm-wlm-20.11.7+really20.11.4/debian/patches/series slurm-wlm-20.11.7+really20.11.4/debian/patches/series --- slurm-wlm-20.11.7+really20.11.4/debian/patches/series 2021-07-14 09:49:22.000000000 +0000 +++ slurm-wlm-20.11.7+really20.11.4/debian/patches/series 2022-05-06 19:14:09.000000000 +0000 @@ -7,3 +7,5 @@ fix-typos pmixv4 CVE-2021-31215 +CVE-2022-29500 +CVE-2022-29501