- 28 Sep, 2016 10 commits
-
-
Morris Jette authored
Add "flag" field to launch_tasks_request_msg. Remove the following fields (moved into flags): multi_prog, task_flags, user_managed_io, pty, buffered_stdio, and labelio. More flags to be added later.
-
Brian Christiansen authored
-
Brian Christiansen authored
*** CID 149289: Null pointer dereferences (REVERSE_INULL) /slurm/src/slurmctld/step_mgr.c: 1993 in _step_dealloc_lps() 1987 xassert(job_resrcs_ptr->cpus); 1988 xassert(job_resrcs_ptr->cpus_used); 1989 1990 if (step_ptr->step_layout == NULL) /* batch step */ 1991 return; 1992 >>> CID 149289: Null pointer dereferences (REVERSE_INULL) >>> Null-checking "job_resrcs_ptr" suggests that it may be null, but it has already been dereferenced on all paths leading to the check. 1993 if (job_resrcs_ptr == NULL) 1994 return; 1995 i_first = bit_ffs(job_resrcs_ptr->node_bitmap); 1996 i_last = bit_fls(job_resrcs_ptr->node_bitmap); 1997 if (i_first == -1) /* empty bitmap */ 1998 return; ** CID 149288: Resource leaks (RESOURCE_LEAK) /slurm/src/api/reconfigure.c: 171 in _send_message_controller() ________________________________________________________________________________________________________ *** CID 149288: Resource leaks (RESOURCE_LEAK) /slurm/src/api/reconfigure.c: 171 in _send_message_controller() 165 } 166 resp_msg = xmalloc(sizeof(slurm_msg_t)); 167 slurm_msg_t_init(resp_msg); 168 169 if ((rc = slurm_receive_msg(fd, resp_msg, 0)) != 0) { 170 slurm_shutdown_msg_conn(fd); >>> CID 149288: Resource leaks (RESOURCE_LEAK) >>> Variable "resp_msg" going out of scope leaks the storage it points to. 171 return SLURMCTLD_COMMUNICATIONS_RECEIVE_ERROR; 172 } 173 174 if (slurm_shutdown_msg_conn(fd) != SLURM_SUCCESS) 175 rc = SLURMCTLD_COMMUNICATIONS_SHUTDOWN_ERROR; 176 else if (resp_msg->msg_type != RESPONSE_SLURM_RC) ** CID 149287: Integer handling issues (NEGATIVE_RETURNS) /slurm/src/slurmdbd/backup.c: 65 in run_dbd_backup() ________________________________________________________________________________________________________ *** CID 149287: Integer handling issues (NEGATIVE_RETURNS) /slurm/src/slurmdbd/backup.c: 65 in run_dbd_backup() 59 primary_resumed = false; 60 61 memset(&slurmdbd_conn, 0, sizeof(slurm_persist_conn_t)); 62 slurmdbd_conn.rem_host = slurmdbd_conf->dbd_addr; 63 slurmdbd_conn.rem_port = slurmdbd_conf->dbd_port; 64 slurmdbd_conn.cluster_name = "backup_slurmdbd"; >>> CID 149287: Integer handling issues (NEGATIVE_RETURNS) >>> Assigning: "slurmdbd_conn.fd" = a negative value. 65 slurmdbd_conn.fd = -1; 66 slurmdbd_conn.shutdown = &shutdown_time; 67 68 slurm_persist_conn_open_without_init(&slurmdbd_conn); 69 70 /* repeatedly ping Primary */ ** CID 149286: Integer handling issues (INCOMPATIBLE_CAST) ________________________________________________________________________________________________________ *** CID 149286: Integer handling issues (INCOMPATIBLE_CAST) /slurm/src/common/slurmdbd_defs.c: 3096 in slurmdbd_unpack_init_msg() 3090 3091 *msg = msg_ptr; 3092 3093 /* We don't use rollback going forward and version was packed after it 3094 * unfortunately */ 3095 if (rpc_version < SLURM_17_02_PROTOCOL_VERSION) >>> CID 149286: Integer handling issues (INCOMPATIBLE_CAST) >>> Pointer "&tmp32" points to an object whose effective type is "unsigned int" (32 bits, unsigned) but is dereferenced as a narrower "unsigned short" (16 bits, unsigned). This may lead to unexpected results depending on machine endianness. 3096 safe_unpack16((uint16_t *)&tmp32, buffer); 3097 safe_unpack16(&msg_ptr->version, buffer); 3098 safe_unpackstr_xmalloc(&msg_ptr->cluster_name, &tmp32, buffer); 3099 3100 /* We find out the version of the caller right here so use 3101 that as the rpc_version. */ ** CID 149285: Null pointer dereferences (FORWARD_NULL) /slurm/src/common/slurm_persist_conn.c: 588 in slurm_persist_conn_open() ________________________________________________________________________________________________________ *** CID 149285: Null pointer dereferences (FORWARD_NULL) /slurm/src/common/slurm_persist_conn.c: 588 in slurm_persist_conn_open() 582 error("%s: Failed to unpack persistent connection init resp message from %s:%d", 583 __func__, 584 persist_conn->rem_host, 585 persist_conn->rem_port); 586 _close_fd(&persist_conn->fd); 587 } else >>> CID 149285: Null pointer dereferences (FORWARD_NULL) >>> Dereferencing null pointer "resp". 588 persist_conn->version = resp->ret_info; 589 } 590 591 end_it: 592 593 slurm_persist_free_rc_msg(resp); ** CID 149284: Error handling issues (CHECKED_RETURN) /slurm/src/common/slurmdbd_defs.c: 1665 in _send_fini_msg() ________________________________________________________________________________________________________ *** CID 149284: Error handling issues (CHECKED_RETURN) /slurm/src/common/slurmdbd_defs.c: 1665 in _send_fini_msg() 1659 buffer = init_buf(1024); 1660 pack16((uint16_t) DBD_FINI, buffer); 1661 req.commit = 0; 1662 req.close_conn = 1; 1663 slurmdbd_pack_fini_msg(&req, SLURM_PROTOCOL_VERSION, buffer); 1664 >>> CID 149284: Error handling issues (CHECKED_RETURN) >>> Calling "slurm_persist_send_msg" without checking return value (as is done elsewhere 5 out of 6 times). 1665 slurm_persist_send_msg(slurmdbd_conn, buffer); 1666 free_buf(buffer); 1667 1668 return SLURM_SUCCESS; 1669 } 1670
-
Morris Jette authored
-
Tim Wickberg authored
Leave macros defined for now to avoid bit reuse.
-
Tim Wickberg authored
Leave the select/bluegene directory alone, as it will be removed entirely once BG/Q support is removed, and the cleanup isn't worth the risk as that is code is rarely changed.
-
Tim Wickberg authored
-
Tim Wickberg authored
Remove from build system, and delete L/P specific files. Run autogen.sh as well.
-
Tim Wickberg authored
Add in missing brackets. Caught by GCC 6.1 -Wall.
-
Morris Jette authored
Add "sbatch_wait_nodes" to SchedulerParameters to control default sbatch behaviour with respect to waiting for all allocated nodes to be ready for use. Job can override the configuration option using the --wait-all-nodes=# option. bug 3120
-
- 27 Sep, 2016 5 commits
-
-
Morris Jette authored
Prior logic would treat execute line like this: $ sbatch --wait-all-nodes -N3 tmp with "-N3" as being the argument to the "--wait-all-nodes" option. See bug 3120
-
Morris Jette authored
-
Morris Jette authored
Add salloc/sbatch/srun option --use-min-nodes to prefer smaller node counts when a range of node counts is specified (e.g. "-N 2-4"). bug 2996
-
Morris Jette authored
-
Tim Wickberg authored
Switch a few SLURM mentions for Slurm as well.
-
- 26 Sep, 2016 7 commits
-
-
Morris Jette authored
-
Morris Jette authored
It was out of alphabetic order before (e.g. after --power).
-
Morris Jette authored
Add salloc/sbatch/srun --priority option of "TOP" to set job priority to the highest possible value. This option is only available to Slurm operators and administrators. bug 3115
-
Morris Jette authored
The problem reported is just a configuration warning and not an error. Also change the test from ">=" to ">". bug 3086
-
Morris Jette authored
-
Morris Jette authored
-
Morris Jette authored
This patch finally resolves absolute/relative CPU mapping for nodes where the NUMA (or sockets) have different core counts (e.g. KNL SNC4).
-
- 25 Sep, 2016 2 commits
-
-
Morris Jette authored
-
Morris Jette authored
-
- 24 Sep, 2016 6 commits
-
-
Morris Jette authored
-
Morris Jette authored
-
Morris Jette authored
-
Morris Jette authored
bug 3090
-
Morris Jette authored
Cray was having problems with this for some reason
-
Morris Jette authored
Make sure no attempt is made to schedule a requeued job until all steps are cleaned (Node Health Check completes for all steps on a Cray). bug 3082
-
- 23 Sep, 2016 9 commits
-
-
Tim Wickberg authored
-
Brian Christiansen authored
-
Brian Christiansen authored
-
Brian Christiansen authored
-
Brian Christiansen authored
-
Brian Christiansen authored
unpacking fed_rec an empty allocated fed_rec.
-
Brian Christiansen authored
-
Brian Christiansen authored
-
Morris Jette authored
Make sure no attempt is made to schedule a requeued job until all steps are cleaned (Node Health Check completes for all steps on a Cray). bug 3082
-
- 22 Sep, 2016 1 commit
-
-
Tim Wickberg authored
Conflicts: src/slurmctld/job_mgr.c
-