1. 29 Sep, 2016 23 commits
  2. 28 Sep, 2016 12 commits
    • Morris Jette's avatar
      Merge branch 'slurm-16.05' · e20c948d
      Morris Jette authored
      e20c948d
    • Morris Jette's avatar
      Fix test to work with DefMemNode configured · 494814e2
      Morris Jette authored
      Added memory limits to the job and step. Without these, only one
        step may be able to run at a time and break the test
      494814e2
    • Morris Jette's avatar
      Add task launch flag field · 3251406c
      Morris Jette authored
      Add "flag" field to launch_tasks_request_msg. Remove the following fields
          (moved into flags): multi_prog, task_flags, user_managed_io, pty,
          buffered_stdio, and labelio. More flags to be added later.
      3251406c
    • Brian Christiansen's avatar
      Fix clang reported issues · ab40ac9c
      Brian Christiansen authored
      ab40ac9c
    • Brian Christiansen's avatar
      Fix issues reported by coverity · 9164e65a
      Brian Christiansen authored
      *** CID 149289:  Null pointer dereferences  (REVERSE_INULL)
      /slurm/src/slurmctld/step_mgr.c: 1993 in _step_dealloc_lps()
      1987     	xassert(job_resrcs_ptr->cpus);
      1988     	xassert(job_resrcs_ptr->cpus_used);
      1989
      1990     	if (step_ptr->step_layout == NULL)	/* batch step */
      1991     		return;
      1992
      >>>     CID 149289:  Null pointer dereferences  (REVERSE_INULL)
      >>>     Null-checking "job_resrcs_ptr" suggests that it may be null, but it has already been dereferenced on all paths leading to the check.
      1993     	if (job_resrcs_ptr == NULL)
      1994     		return;
      1995     	i_first = bit_ffs(job_resrcs_ptr->node_bitmap);
      1996     	i_last  = bit_fls(job_resrcs_ptr->node_bitmap);
      1997     	if (i_first == -1)	/* empty bitmap */
      1998     		return;
      
      ** CID 149288:  Resource leaks  (RESOURCE_LEAK)
      /slurm/src/api/reconfigure.c: 171 in _send_message_controller()
      
      ________________________________________________________________________________________________________
      *** CID 149288:  Resource leaks  (RESOURCE_LEAK)
      /slurm/src/api/reconfigure.c: 171 in _send_message_controller()
      165     	}
      166     	resp_msg = xmalloc(sizeof(slurm_msg_t));
      167     	slurm_msg_t_init(resp_msg);
      168
      169     	if ((rc = slurm_receive_msg(fd, resp_msg, 0)) != 0) {
      170     		slurm_shutdown_msg_conn(fd);
      >>>     CID 149288:  Resource leaks  (RESOURCE_LEAK)
      >>>     Variable "resp_msg" going out of scope leaks the storage it points to.
      171     		return SLURMCTLD_COMMUNICATIONS_RECEIVE_ERROR;
      172     	}
      173
      174     	if (slurm_shutdown_msg_conn(fd) != SLURM_SUCCESS)
      175     		rc = SLURMCTLD_COMMUNICATIONS_SHUTDOWN_ERROR;
      176     	else if (resp_msg->msg_type != RESPONSE_SLURM_RC)
      
      ** CID 149287:  Integer handling issues  (NEGATIVE_RETURNS)
      /slurm/src/slurmdbd/backup.c: 65 in run_dbd_backup()
      
      ________________________________________________________________________________________________________
      *** CID 149287:  Integer handling issues  (NEGATIVE_RETURNS)
      /slurm/src/slurmdbd/backup.c: 65 in run_dbd_backup()
      59     	primary_resumed = false;
      60
      61     	memset(&slurmdbd_conn, 0, sizeof(slurm_persist_conn_t));
      62     	slurmdbd_conn.rem_host = slurmdbd_conf->dbd_addr;
      63     	slurmdbd_conn.rem_port = slurmdbd_conf->dbd_port;
      64     	slurmdbd_conn.cluster_name = "backup_slurmdbd";
      >>>     CID 149287:  Integer handling issues  (NEGATIVE_RETURNS)
      >>>     Assigning: "slurmdbd_conn.fd" = a negative value.
      65     	slurmdbd_conn.fd = -1;
      66     	slurmdbd_conn.shutdown = &shutdown_time;
      67
      68     	slurm_persist_conn_open_without_init(&slurmdbd_conn);
      69
      70     	/* repeatedly ping Primary */
      
      ** CID 149286:  Integer handling issues  (INCOMPATIBLE_CAST)
      
      ________________________________________________________________________________________________________
      *** CID 149286:  Integer handling issues  (INCOMPATIBLE_CAST)
      /slurm/src/common/slurmdbd_defs.c: 3096 in slurmdbd_unpack_init_msg()
      3090
      3091     	*msg = msg_ptr;
      3092
      3093     	/* We don't use rollback going forward and version was packed after it
      3094     	 * unfortunately */
      3095     	if (rpc_version < SLURM_17_02_PROTOCOL_VERSION)
      >>>     CID 149286:  Integer handling issues  (INCOMPATIBLE_CAST)
      >>>     Pointer "&tmp32" points to an object whose effective type is "unsigned int" (32 bits, unsigned) but is dereferenced as a narrower "unsigned short" (16 bits, unsigned).  This may lead to unexpected results depending on machine endianness.
      3096     		safe_unpack16((uint16_t *)&tmp32, buffer);
      3097     	safe_unpack16(&msg_ptr->version, buffer);
      3098     	safe_unpackstr_xmalloc(&msg_ptr->cluster_name, &tmp32, buffer);
      3099
      3100     	/* We find out the version of the caller right here so use
      3101     	   that as the rpc_version. */
      
      ** CID 149285:  Null pointer dereferences  (FORWARD_NULL)
      /slurm/src/common/slurm_persist_conn.c: 588 in slurm_persist_conn_open()
      
      ________________________________________________________________________________________________________
      *** CID 149285:  Null pointer dereferences  (FORWARD_NULL)
      /slurm/src/common/slurm_persist_conn.c: 588 in slurm_persist_conn_open()
      582     				error("%s: Failed to unpack persistent connection init resp message from %s:%d",
      583     				      __func__,
      584     				      persist_conn->rem_host,
      585     				      persist_conn->rem_port);
      586     			_close_fd(&persist_conn->fd);
      587     		} else
      >>>     CID 149285:  Null pointer dereferences  (FORWARD_NULL)
      >>>     Dereferencing null pointer "resp".
      588     			persist_conn->version = resp->ret_info;
      589     	}
      590
      591     end_it:
      592
      593     	slurm_persist_free_rc_msg(resp);
      
      ** CID 149284:  Error handling issues  (CHECKED_RETURN)
      /slurm/src/common/slurmdbd_defs.c: 1665 in _send_fini_msg()
      
      ________________________________________________________________________________________________________
      *** CID 149284:  Error handling issues  (CHECKED_RETURN)
      /slurm/src/common/slurmdbd_defs.c: 1665 in _send_fini_msg()
      1659     	buffer = init_buf(1024);
      1660     	pack16((uint16_t) DBD_FINI, buffer);
      1661     	req.commit  = 0;
      1662     	req.close_conn   = 1;
      1663     	slurmdbd_pack_fini_msg(&req, SLURM_PROTOCOL_VERSION, buffer);
      1664
      >>>     CID 149284:  Error handling issues  (CHECKED_RETURN)
      >>>     Calling "slurm_persist_send_msg" without checking return value (as is done elsewhere 5 out of 6 times).
      1665     	slurm_persist_send_msg(slurmdbd_conn, buffer);
      1666     	free_buf(buffer);
      1667
      1668     	return SLURM_SUCCESS;
      1669     }
      1670
      9164e65a
    • Morris Jette's avatar
      Merge branch 'slurm-16.05' · 8fcdceeb
      Morris Jette authored
      8fcdceeb
    • Tim Wickberg's avatar
      Remove use of CLUSTER_FLAG_BGL and CLUSTER_FLAG_BGP flags. · eec5bccb
      Tim Wickberg authored
      Leave macros defined for now to avoid bit reuse.
      eec5bccb
    • Tim Wickberg's avatar
      Remove HAVE_BGL HAVE_BGP HAVE_BG_L_P macros. · 186df5d1
      Tim Wickberg authored
      Leave the select/bluegene directory alone, as it will be removed
      entirely once BG/Q support is removed, and the cleanup isn't worth
      the risk as that is code is rarely changed.
      186df5d1
    • Tim Wickberg's avatar
    • Tim Wickberg's avatar
      Remove BlueGene/L and BlueGene/P support. · b818dd9d
      Tim Wickberg authored
      Remove from build system, and delete L/P specific files.
      Run autogen.sh as well.
      b818dd9d
    • Tim Wickberg's avatar
      Prevent 'sacctmgr shutdown' from always returning 1. · d1ea0b49
      Tim Wickberg authored
      Add in missing brackets. Caught by GCC 6.1 -Wall.
      d1ea0b49
    • Morris Jette's avatar
      Add configuration parameter to control default wait-all-nodes option · 68299d7d
      Morris Jette authored
      Add "sbatch_wait_nodes" to SchedulerParameters to control default sbatch
          behaviour with respect to waiting for all allocated nodes to be ready for
          use. Job can override the configuration option using the --wait-all-nodes=#
          option.
      bug 3120
      68299d7d
  3. 27 Sep, 2016 5 commits