This patch was submitted by Shantanu Goel in a Trac ticket. Sending to the list for review. I haven't yet had a chance to dig into it myself yet, but the concept as described in https://fedorahosted.org/sssd/ticket/1354 is sound, so I'd like to see this get cleaned up and included.
On Wed, 2012-05-30 at 08:23 -0400, Stephen Gallagher wrote:
This patch was submitted by Shantanu Goel in a Trac ticket. Sending to the list for review. I haven't yet had a chance to dig into it myself yet, but the concept as described in https://fedorahosted.org/sssd/ticket/1354 is sound, so I'd like to see this get cleaned up and included.
Comments inline, but I can already say I want more comments in the code, there are none.
differences between files attachment (sssd-1.5.1-expire-idle-connection.diff)
--- .gen-patch.27968/src/confdb/confdb.h 2012-04-20 14:01:34.477465315 -0400 +++ sssd-1.5.1/src/confdb/confdb.h 2012-04-20 14:01:45.069779773 -0400 @@ -78,6 +78,7 @@ #define CONFDB_NSS_VETOED_SHELL "vetoed_shells" #define CONFDB_NSS_ALLOWED_SHELL "allowed_shells" #define CONFDB_NSS_SHELL_FALLBACK "shell_fallback" +#define CONFDB_NSS_IDLE_TIMEOUT "idle_timeout"
/* PAM */ #define CONFDB_PAM_CONF_ENTRY "config/pam" --- .gen-patch.27968/src/responder/common/responder_common.c 2012-04-20 14:01:34.478465251 -0400 +++ sssd-1.5.1/src/responder/common/responder_common.c 2012-04-20 14:01:45.070779708 -0400 @@ -88,9 +88,82 @@ return EOK; }
+static void idle_handler(struct tevent_context *,
struct tevent_timer *,
struct timeval,
void *);
+static void start_timer(struct resp_ctx *rctx, struct timeval *tvp) +{
- struct timeval tv;
- if (rctx->idle_timeout <= 0)
return;
- if (rctx->timer)
return;
- if (!rctx->cctx)
return;
Style guidelines violations ^^ We use: if (x) { blah }
- if (!tvp) {
tvp = &tv;
gettimeofday(tvp, NULL);
- }
Please use tevent_timeval_current() here ^
- tvp->tv_sec += rctx->idle_timeout;
- rctx->timer = tevent_add_timer(rctx->cctx->ev, rctx,
*tvp, idle_handler, rctx);
- if (!rctx->timer)
DEBUG(0, ("could not start idle timer\n"));
+}
+static void idle_handler(struct tevent_context *ev,
struct tevent_timer *te,
struct timeval current_time,
void *data)
+{
- struct resp_ctx *rctx = data;
- struct cli_ctx *cctx = rctx->cctx;
- int n = 0;
- while (cctx) {
struct cli_ctx *cctx_next = cctx->next;
if (cctx->active || cctx->creq)
cctx->active = 0;
else if (talloc_free(cctx) == 0)
n++;
sever style violation ^^ as well, plus I want a comment on top that explain why we are doing an either or.
Also what's the point of this check on talloc_free() ? It is not like we have any options left if it fails.
Also please use a for loop and a continue after the if (), will be much more readable.
cctx = cctx_next;
- }
- if (n)
DEBUG(2, ("terminated %d idle connecions\n", n));
braces ^^
- /*
* The timer event is freed by the library.
*/
- rctx->timer = NULL;
- start_timer(rctx, ¤t_time);
+}
+static void client_add(struct cli_ctx *cctx) +{
- struct resp_ctx *rctx = cctx->rctx;
- cctx->next = rctx->cctx;
- cctx->prev = &rctx->cctx;
- rctx->cctx = cctx;
- if (cctx->next)
cctx->next->prev = &cctx->next;
use DLIST_* macros here ^^ and elswhere, I do not want to see custom linked list manipulation unless unavoidable (and in that case I want to see a comment that explains why it is unavoidable.
- start_timer(rctx, NULL);
+}
static int client_destructor(struct cli_ctx *ctx) { if (ctx->cfd > 0) close(ctx->cfd);
style violation, pluse return of close is not checked, check and fire a debug message in case of error.
- if (ctx->prev) {
*ctx->prev = ctx->next;
if (ctx->next)
ctx->next->prev = ctx->prev;
ctx->next = NULL;
ctx->prev = NULL;
- }
use DLIST_*
return 0;
}
@@ -132,6 +205,8 @@ { int ret;
- cctx->active++;
- ret = sss_packet_send(cctx->creq->out, cctx->cfd); if (ret == EAGAIN) { /* not all data was sent, loop again */
@@ -155,6 +230,8 @@ { int ret;
- cctx->active++;
Why these active++ up here, it feels wrong. I haven't checked this throghly but I would think either an accessor function should be used or even just fold this into client_add()/start_timer()
if (!cctx->creq) { cctx->creq = talloc_zero(cctx, struct cli_request); if (!cctx->creq) {
@@ -298,6 +375,8 @@ cctx->ev = ev; cctx->rctx = rctx;
client_add(cctx);
talloc_set_destructor(cctx, client_destructor);
DEBUG(4, ("Client connected to privileged pipe!\n"));
@@ -356,6 +435,8 @@ cctx->ev = ev; cctx->rctx = rctx;
client_add(cctx);
talloc_set_destructor(cctx, client_destructor);
DEBUG(4, ("Client connected!\n"));
@@ -585,24 +666,27 @@ return EIO; }
-int sss_process_init(TALLOC_CTX *mem_ctx,
struct tevent_context *ev,
struct confdb_ctx *cdb,
struct sss_cmd_table sss_cmds[],
const char *sss_pipe_name,
const char *sss_priv_pipe_name,
const char *confdb_service_path,
const char *svc_name,
uint16_t svc_version,
struct sbus_interface *monitor_intf,
const char *cli_name,
struct sbus_interface *dp_intf,
struct resp_ctx **responder_ctx)
+int sss_process_init_timeout(TALLOC_CTX *mem_ctx,
struct tevent_context *ev,
struct confdb_ctx *cdb,
struct sss_cmd_table sss_cmds[],
const char *sss_pipe_name,
const char *sss_priv_pipe_name,
const char *confdb_service_path,
const char *svc_name,
uint16_t svc_version,
struct sbus_interface *monitor_intf,
const char *cli_name,
struct sbus_interface *dp_intf,
struct resp_ctx **responder_ctx,
time_t idle_timeout)
{ struct resp_ctx *rctx; struct sss_domain_info *dom; int ret;
- DEBUG(2, ("connection idle timeout %d\n", idle_timeout));
- rctx = talloc_zero(mem_ctx, struct resp_ctx); if (!rctx) { DEBUG(0, ("fatal error initializing resp_ctx\n"));
@@ -614,6 +698,7 @@ rctx->sock_name = sss_pipe_name; rctx->priv_sock_name = sss_priv_pipe_name; rctx->confdb_service_path = confdb_service_path;
rctx->idle_timeout = idle_timeout;
ret = confdb_get_domains(rctx->cdb, &rctx->domains); if (ret != EOK) {
@@ -666,6 +751,27 @@ return EOK; }
+int sss_process_init(TALLOC_CTX *mem_ctx,
struct tevent_context *ev,
struct confdb_ctx *cdb,
struct sss_cmd_table sss_cmds[],
const char *sss_pipe_name,
const char *sss_priv_pipe_name,
const char *confdb_service_path,
const char *svc_name,
uint16_t svc_version,
struct sbus_interface *monitor_intf,
const char *cli_name,
struct sbus_interface *dp_intf,
struct resp_ctx **responder_ctx)
+{
- return sss_process_init_timeout(mem_ctx, ev, cdb, sss_cmds,
sss_pipe_name, sss_priv_pipe_name,
confdb_service_path, svc_name,
svc_version, monitor_intf,
cli_name,
dp_intf, responder_ctx, 0);
+}
Why not simply add the timeval and keep the original name ? In what case do we want to idle out only for nss stuff ? I think we always want to idle out for any client connection, whtehr it is for nss, pam, ssh, sudo, etc...
int sss_dp_get_domain_conn(struct resp_ctx *rctx, const char *domain, struct be_conn **_conn) { --- .gen-patch.27968/src/responder/common/responder.h 2012-04-20 14:01:34.478465251 -0400 +++ sssd-1.5.1/src/responder/common/responder.h 2012-04-20 14:01:45.070779708 -0400 @@ -90,6 +90,11 @@ struct sss_names_ctx *names;
void *pvt_ctx;
- struct cli_ctx *cctx;
- time_t idle_timeout;
- struct tevent_timer *timer;
};
/* Needed for the NSS responder */ @@ -120,6 +125,11 @@ int netgrent_cur;
time_t pam_timeout;
- int active;
- struct cli_ctx *next;
- struct cli_ctx **prev;
};
struct sss_cmd_table { @@ -142,6 +152,21 @@ struct sbus_interface *dp_intf, struct resp_ctx **responder_ctx);
+int sss_process_init_timeout(TALLOC_CTX *mem_ctx,
struct tevent_context *ev,
struct confdb_ctx *cdb,
struct sss_cmd_table sss_cmds[],
const char *sss_pipe_name,
const char *sss_priv_pipe_name,
const char *confdb_service_path,
const char *svc_name,
uint16_t svc_version,
struct sbus_interface *monitor_intf,
const char *cli_name,
struct sbus_interface *dp_intf,
struct resp_ctx **responder_ctx,
time_t idle_timeout);
int sss_parse_name(TALLOC_CTX *memctx, struct sss_names_ctx *snctx, const char *orig, char **domain, char **name); --- .gen-patch.27968/src/responder/nss/nsssrv.c 2012-04-20 14:01:34.478465251 -0400 +++ sssd-1.5.1/src/responder/nss/nsssrv.c 2012-04-20 14:01:45.070779708 -0400 @@ -258,6 +258,7 @@ struct nss_ctx *nctx; int ret, max_retries; int hret;
int idle_timeout;
nctx = talloc_zero(mem_ctx, struct nss_ctx); if (!nctx) {
@@ -273,15 +274,22 @@
nss_cmds = get_nss_cmds();
- ret = sss_process_init(nctx, ev, cdb,
nss_cmds,
SSS_NSS_SOCKET_NAME, NULL,
CONFDB_NSS_CONF_ENTRY,
NSS_SBUS_SERVICE_NAME,
NSS_SBUS_SERVICE_VERSION,
&monitor_nss_interface,
"NSS", &nss_dp_interface,
&nctx->rctx);
- ret = confdb_get_int(cdb, nctx, CONFDB_NSS_CONF_ENTRY,
CONFDB_NSS_IDLE_TIMEOUT, 0, &idle_timeout);
- if (ret != EOK) {
DEBUG(0, ("fatal error getting nss config\n"));
use new debug codes ^^ (also above and below elsewhere).
return ret;
- }
- ret = sss_process_init_timeout(nctx, ev, cdb,
nss_cmds,
SSS_NSS_SOCKET_NAME, NULL,
CONFDB_NSS_CONF_ENTRY,
NSS_SBUS_SERVICE_NAME,
NSS_SBUS_SERVICE_VERSION,
&monitor_nss_interface,
"NSS", &nss_dp_interface,
if (ret != EOK) { return ret; }&nctx->rctx, idle_timeout);
--- .gen-patch.27968/src/sss_client/common.c 2012-04-20 14:01:34.479465187 -0400 +++ sssd-1.5.1/src/sss_client/common.c 2012-04-20 14:01:51.388370756 -0400 @@ -38,6 +38,7 @@ #include <stdbool.h> #include <stdint.h> #include <string.h> +#include <signal.h> #include <fcntl.h> #include <poll.h>
@@ -154,8 +155,8 @@ }
/* Write failed */
sss_cli_close_socket(); *errnop = errno;
sss_cli_close_socket(); return NSS_STATUS_UNAVAIL; }
@@ -264,8 +265,8 @@ * since the transaction has failed half way * through. */
sss_cli_close_socket(); *errnop = errno;
sss_cli_close_socket(); ret = NSS_STATUS_UNAVAIL; goto failed; }
The above 2 sound like some sort of bugfix, so they shouldn't be merged in the same patch, they need a separate patch with a good explanation in the commit message.
@@ -363,12 +364,11 @@
- 0-3: 32bit unsigned version number
*/
-static int sss_nss_check_version(const char *socket_name) +static int sss_nss_check_version(const char *socket_name, int *errnop) { uint8_t *repbuf; size_t replen; enum nss_status nret;
- int errnop; int res = NSS_STATUS_UNAVAIL; uint32_t expected_version; struct sss_cli_req_data req;
@@ -379,6 +379,7 @@ strcmp(socket_name, SSS_PAM_PRIV_SOCKET_NAME) == 0) { expected_version = SSS_PAM_PROTOCOL_VERSION; } else {
}*errnop = EFAULT; return NSS_STATUS_UNAVAIL;
@@ -386,20 +387,20 @@ req.data = &expected_version;
nret = sss_nss_make_request_nochecks(SSS_GET_VERSION, &req,
&repbuf, &replen, &errnop);
if (nret != NSS_STATUS_SUCCESS) { return nret; }&repbuf, &replen, errnop);
- if (!repbuf) {
return res;
- }
- if (((uint32_t *)repbuf)[0] == expected_version) {
- if (repbuf && ((uint32_t *)repbuf)[0] == expected_version) {
*errnop = 0; res = NSS_STATUS_SUCCESS;
- }
- } else
*errnop = EFAULT;
style
- if (repbuf)
free(repbuf);
useless change, do not do it! free() does not need guards against NULL
- free(repbuf); return res;
}
@@ -670,12 +671,11 @@
sss_cli_sd = mysd;
+- if (sss_nss_check_version(socket_name) == NSS_STATUS_SUCCESS) {
- if (sss_nss_check_version(socket_name, errnop) ==
NSS_STATUS_SUCCESS) {
Line wrapping, plus if you are changin this line then split it in ret = foo() if (ret == bar) {
return SSS_STATUS_SUCCESS; } sss_cli_close_socket();
- *errnop = EFAULT; return SSS_STATUS_UNAVAIL;
}
@@ -686,8 +686,10 @@ uint8_t **repbuf, size_t *replen, int *errnop) {
- enum sss_status ret; char *envval;
int retry, restore;
struct sigaction sa, osa;
enum nss_status ret = NSS_STATUS_UNAVAIL;
/* avoid looping in the nss daemon */ envval = getenv("_SSS_LOOPS");
@@ -695,12 +697,32 @@ return NSS_STATUS_NOTFOUND; }
- ret = sss_cli_check_socket(errnop, SSS_NSS_SOCKET_NAME);
- if (ret != SSS_STATUS_SUCCESS) {
return NSS_STATUS_UNAVAIL;
- /*
* Block off SIGPIPE and retry the command
* in case the other end closes the pipe on us.
*/
- sigemptyset(&sa.sa_mask);
- sa.sa_flags = 0;
- sa.sa_handler = SIG_IGN;
- if (sigaction(SIGPIPE, &sa, &osa) == 0) {
restore = 1;
retry = 2;
- } else {
restore = 0;
}retry = 1;
NACK NACK NACK
We do not mess with application signals in a nsswitch library absolutely unacceptable.
- do {
if (sss_cli_check_socket(errnop,
SSS_NSS_SOCKET_NAME) ==
SSS_STATUS_SUCCESS)
ret = sss_nss_make_request_nochecks(cmd, rd, repbuf,
replen, errnop);
- } while (ret == NSS_STATUS_UNAVAIL
&& (*errnop == EPIPE || *errnop == 0)
&& --retry > 0);
- if (restore)
sigaction(SIGPIPE, &osa, NULL);
- return sss_nss_make_request_nochecks(cmd, rd, repbuf, replen,
errnop);
- return ret;
}
errno_t check_server_cred(int sockfd)
It's a NACK.
Simo.
On 05/30/2012 09:08 AM, Simo Sorce wrote:
On Wed, 2012-05-30 at 08:23 -0400, Stephen Gallagher wrote:
This patch was submitted by Shantanu Goel in a Trac ticket. Sending to the list for review. I haven't yet had a chance to dig into it myself yet, but the concept as described in https://fedorahosted.org/sssd/ticket/1354 is sound, so I'd like to see this get cleaned up and included.
Seems like a big effort. Thank you for the patch! We would gladly accept it when the style, comment and functional concerns are addressed. Please do not be offended, it is just the process we rigorously follow. We would absolutely delighted to get more contributions in future!
Thanks Dmitri
Comments inline, but I can already say I want more comments in the code, there are none.
differences between files attachment (sssd-1.5.1-expire-idle-connection.diff)
--- .gen-patch.27968/src/confdb/confdb.h 2012-04-20 14:01:34.477465315 -0400 +++ sssd-1.5.1/src/confdb/confdb.h 2012-04-20 14:01:45.069779773 -0400 @@ -78,6 +78,7 @@ #define CONFDB_NSS_VETOED_SHELL "vetoed_shells" #define CONFDB_NSS_ALLOWED_SHELL "allowed_shells" #define CONFDB_NSS_SHELL_FALLBACK "shell_fallback" +#define CONFDB_NSS_IDLE_TIMEOUT "idle_timeout"
/* PAM */ #define CONFDB_PAM_CONF_ENTRY "config/pam" --- .gen-patch.27968/src/responder/common/responder_common.c 2012-04-20 14:01:34.478465251 -0400 +++ sssd-1.5.1/src/responder/common/responder_common.c 2012-04-20 14:01:45.070779708 -0400 @@ -88,9 +88,82 @@ return EOK; }
+static void idle_handler(struct tevent_context *,
struct tevent_timer *,
struct timeval,
void *);
+static void start_timer(struct resp_ctx *rctx, struct timeval *tvp) +{
- struct timeval tv;
- if (rctx->idle_timeout <= 0)
return;
- if (rctx->timer)
return;
- if (!rctx->cctx)
return;
Style guidelines violations ^^ We use: if (x) { blah }
- if (!tvp) {
tvp = &tv;
gettimeofday(tvp, NULL);
- }
Please use tevent_timeval_current() here ^
- tvp->tv_sec += rctx->idle_timeout;
- rctx->timer = tevent_add_timer(rctx->cctx->ev, rctx,
*tvp, idle_handler, rctx);
- if (!rctx->timer)
DEBUG(0, ("could not start idle timer\n"));
+}
+static void idle_handler(struct tevent_context *ev,
struct tevent_timer *te,
struct timeval current_time,
void *data)
+{
- struct resp_ctx *rctx = data;
- struct cli_ctx *cctx = rctx->cctx;
- int n = 0;
- while (cctx) {
struct cli_ctx *cctx_next = cctx->next;
if (cctx->active || cctx->creq)
cctx->active = 0;
else if (talloc_free(cctx) == 0)
n++;
sever style violation ^^ as well, plus I want a comment on top that explain why we are doing an either or.
Also what's the point of this check on talloc_free() ? It is not like we have any options left if it fails.
Also please use a for loop and a continue after the if (), will be much more readable.
cctx = cctx_next;
- }
- if (n)
DEBUG(2, ("terminated %d idle connecions\n", n));
braces ^^
- /*
* The timer event is freed by the library.
*/
- rctx->timer = NULL;
- start_timer(rctx, ¤t_time);
+}
+static void client_add(struct cli_ctx *cctx) +{
- struct resp_ctx *rctx = cctx->rctx;
- cctx->next = rctx->cctx;
- cctx->prev = &rctx->cctx;
- rctx->cctx = cctx;
- if (cctx->next)
cctx->next->prev = &cctx->next;
use DLIST_* macros here ^^ and elswhere, I do not want to see custom linked list manipulation unless unavoidable (and in that case I want to see a comment that explains why it is unavoidable.
- start_timer(rctx, NULL);
+}
static int client_destructor(struct cli_ctx *ctx) { if (ctx->cfd > 0) close(ctx->cfd);
style violation, pluse return of close is not checked, check and fire a debug message in case of error.
- if (ctx->prev) {
*ctx->prev = ctx->next;
if (ctx->next)
ctx->next->prev = ctx->prev;
ctx->next = NULL;
ctx->prev = NULL;
- }
use DLIST_*
return 0;
}
@@ -132,6 +205,8 @@ { int ret;
- cctx->active++;
- ret = sss_packet_send(cctx->creq->out, cctx->cfd); if (ret == EAGAIN) { /* not all data was sent, loop again */
@@ -155,6 +230,8 @@ { int ret;
- cctx->active++;
Why these active++ up here, it feels wrong. I haven't checked this throghly but I would think either an accessor function should be used or even just fold this into client_add()/start_timer()
if (!cctx->creq) { cctx->creq = talloc_zero(cctx, struct cli_request); if (!cctx->creq) {
@@ -298,6 +375,8 @@ cctx->ev = ev; cctx->rctx = rctx;
client_add(cctx);
talloc_set_destructor(cctx, client_destructor);
DEBUG(4, ("Client connected to privileged pipe!\n"));
@@ -356,6 +435,8 @@ cctx->ev = ev; cctx->rctx = rctx;
client_add(cctx);
talloc_set_destructor(cctx, client_destructor);
DEBUG(4, ("Client connected!\n"));
@@ -585,24 +666,27 @@ return EIO; }
-int sss_process_init(TALLOC_CTX *mem_ctx,
struct tevent_context *ev,
struct confdb_ctx *cdb,
struct sss_cmd_table sss_cmds[],
const char *sss_pipe_name,
const char *sss_priv_pipe_name,
const char *confdb_service_path,
const char *svc_name,
uint16_t svc_version,
struct sbus_interface *monitor_intf,
const char *cli_name,
struct sbus_interface *dp_intf,
struct resp_ctx **responder_ctx)
+int sss_process_init_timeout(TALLOC_CTX *mem_ctx,
struct tevent_context *ev,
struct confdb_ctx *cdb,
struct sss_cmd_table sss_cmds[],
const char *sss_pipe_name,
const char *sss_priv_pipe_name,
const char *confdb_service_path,
const char *svc_name,
uint16_t svc_version,
struct sbus_interface *monitor_intf,
const char *cli_name,
struct sbus_interface *dp_intf,
struct resp_ctx **responder_ctx,
time_t idle_timeout)
{ struct resp_ctx *rctx; struct sss_domain_info *dom; int ret;
- DEBUG(2, ("connection idle timeout %d\n", idle_timeout));
- rctx = talloc_zero(mem_ctx, struct resp_ctx); if (!rctx) { DEBUG(0, ("fatal error initializing resp_ctx\n"));
@@ -614,6 +698,7 @@ rctx->sock_name = sss_pipe_name; rctx->priv_sock_name = sss_priv_pipe_name; rctx->confdb_service_path = confdb_service_path;
rctx->idle_timeout = idle_timeout;
ret = confdb_get_domains(rctx->cdb, &rctx->domains); if (ret != EOK) {
@@ -666,6 +751,27 @@ return EOK; }
+int sss_process_init(TALLOC_CTX *mem_ctx,
struct tevent_context *ev,
struct confdb_ctx *cdb,
struct sss_cmd_table sss_cmds[],
const char *sss_pipe_name,
const char *sss_priv_pipe_name,
const char *confdb_service_path,
const char *svc_name,
uint16_t svc_version,
struct sbus_interface *monitor_intf,
const char *cli_name,
struct sbus_interface *dp_intf,
struct resp_ctx **responder_ctx)
+{
- return sss_process_init_timeout(mem_ctx, ev, cdb, sss_cmds,
sss_pipe_name, sss_priv_pipe_name,
confdb_service_path, svc_name,
svc_version, monitor_intf,
cli_name,
dp_intf, responder_ctx, 0);
+}
Why not simply add the timeval and keep the original name ? In what case do we want to idle out only for nss stuff ? I think we always want to idle out for any client connection, whtehr it is for nss, pam, ssh, sudo, etc...
int sss_dp_get_domain_conn(struct resp_ctx *rctx, const char *domain, struct be_conn **_conn) { --- .gen-patch.27968/src/responder/common/responder.h 2012-04-20 14:01:34.478465251 -0400 +++ sssd-1.5.1/src/responder/common/responder.h 2012-04-20 14:01:45.070779708 -0400 @@ -90,6 +90,11 @@ struct sss_names_ctx *names;
void *pvt_ctx;
- struct cli_ctx *cctx;
- time_t idle_timeout;
- struct tevent_timer *timer;
};
/* Needed for the NSS responder */ @@ -120,6 +125,11 @@ int netgrent_cur;
time_t pam_timeout;
- int active;
- struct cli_ctx *next;
- struct cli_ctx **prev;
};
struct sss_cmd_table { @@ -142,6 +152,21 @@ struct sbus_interface *dp_intf, struct resp_ctx **responder_ctx);
+int sss_process_init_timeout(TALLOC_CTX *mem_ctx,
struct tevent_context *ev,
struct confdb_ctx *cdb,
struct sss_cmd_table sss_cmds[],
const char *sss_pipe_name,
const char *sss_priv_pipe_name,
const char *confdb_service_path,
const char *svc_name,
uint16_t svc_version,
struct sbus_interface *monitor_intf,
const char *cli_name,
struct sbus_interface *dp_intf,
struct resp_ctx **responder_ctx,
time_t idle_timeout);
int sss_parse_name(TALLOC_CTX *memctx, struct sss_names_ctx *snctx, const char *orig, char **domain, char **name); --- .gen-patch.27968/src/responder/nss/nsssrv.c 2012-04-20 14:01:34.478465251 -0400 +++ sssd-1.5.1/src/responder/nss/nsssrv.c 2012-04-20 14:01:45.070779708 -0400 @@ -258,6 +258,7 @@ struct nss_ctx *nctx; int ret, max_retries; int hret;
int idle_timeout;
nctx = talloc_zero(mem_ctx, struct nss_ctx); if (!nctx) {
@@ -273,15 +274,22 @@
nss_cmds = get_nss_cmds();
- ret = sss_process_init(nctx, ev, cdb,
nss_cmds,
SSS_NSS_SOCKET_NAME, NULL,
CONFDB_NSS_CONF_ENTRY,
NSS_SBUS_SERVICE_NAME,
NSS_SBUS_SERVICE_VERSION,
&monitor_nss_interface,
"NSS", &nss_dp_interface,
&nctx->rctx);
- ret = confdb_get_int(cdb, nctx, CONFDB_NSS_CONF_ENTRY,
CONFDB_NSS_IDLE_TIMEOUT, 0, &idle_timeout);
- if (ret != EOK) {
DEBUG(0, ("fatal error getting nss config\n"));
use new debug codes ^^ (also above and below elsewhere).
return ret;
- }
- ret = sss_process_init_timeout(nctx, ev, cdb,
nss_cmds,
SSS_NSS_SOCKET_NAME, NULL,
CONFDB_NSS_CONF_ENTRY,
NSS_SBUS_SERVICE_NAME,
NSS_SBUS_SERVICE_VERSION,
&monitor_nss_interface,
"NSS", &nss_dp_interface,
if (ret != EOK) { return ret; }&nctx->rctx, idle_timeout);
--- .gen-patch.27968/src/sss_client/common.c 2012-04-20 14:01:34.479465187 -0400 +++ sssd-1.5.1/src/sss_client/common.c 2012-04-20 14:01:51.388370756 -0400 @@ -38,6 +38,7 @@ #include <stdbool.h> #include <stdint.h> #include <string.h> +#include <signal.h> #include <fcntl.h> #include <poll.h>
@@ -154,8 +155,8 @@ }
/* Write failed */
sss_cli_close_socket(); *errnop = errno;
sss_cli_close_socket(); return NSS_STATUS_UNAVAIL; }
@@ -264,8 +265,8 @@ * since the transaction has failed half way * through. */
sss_cli_close_socket(); *errnop = errno;
sss_cli_close_socket(); ret = NSS_STATUS_UNAVAIL; goto failed; }
The above 2 sound like some sort of bugfix, so they shouldn't be merged in the same patch, they need a separate patch with a good explanation in the commit message.
@@ -363,12 +364,11 @@
- 0-3: 32bit unsigned version number
*/
-static int sss_nss_check_version(const char *socket_name) +static int sss_nss_check_version(const char *socket_name, int *errnop) { uint8_t *repbuf; size_t replen; enum nss_status nret;
- int errnop; int res = NSS_STATUS_UNAVAIL; uint32_t expected_version; struct sss_cli_req_data req;
@@ -379,6 +379,7 @@ strcmp(socket_name, SSS_PAM_PRIV_SOCKET_NAME) == 0) { expected_version = SSS_PAM_PROTOCOL_VERSION; } else {
}*errnop = EFAULT; return NSS_STATUS_UNAVAIL;
@@ -386,20 +387,20 @@ req.data = &expected_version;
nret = sss_nss_make_request_nochecks(SSS_GET_VERSION, &req,
&repbuf, &replen, &errnop);
if (nret != NSS_STATUS_SUCCESS) { return nret; }&repbuf, &replen, errnop);
- if (!repbuf) {
return res;
- }
- if (((uint32_t *)repbuf)[0] == expected_version) {
- if (repbuf && ((uint32_t *)repbuf)[0] == expected_version) {
*errnop = 0; res = NSS_STATUS_SUCCESS;
- }
- } else
*errnop = EFAULT;
style
- if (repbuf)
free(repbuf);
useless change, do not do it! free() does not need guards against NULL
- free(repbuf); return res;
}
@@ -670,12 +671,11 @@
sss_cli_sd = mysd;
+- if (sss_nss_check_version(socket_name) == NSS_STATUS_SUCCESS) {
- if (sss_nss_check_version(socket_name, errnop) ==
NSS_STATUS_SUCCESS) {
Line wrapping, plus if you are changin this line then split it in ret = foo() if (ret == bar) {
return SSS_STATUS_SUCCESS; } sss_cli_close_socket();
- *errnop = EFAULT; return SSS_STATUS_UNAVAIL;
}
@@ -686,8 +686,10 @@ uint8_t **repbuf, size_t *replen, int *errnop) {
- enum sss_status ret; char *envval;
int retry, restore;
struct sigaction sa, osa;
enum nss_status ret = NSS_STATUS_UNAVAIL;
/* avoid looping in the nss daemon */ envval = getenv("_SSS_LOOPS");
@@ -695,12 +697,32 @@ return NSS_STATUS_NOTFOUND; }
- ret = sss_cli_check_socket(errnop, SSS_NSS_SOCKET_NAME);
- if (ret != SSS_STATUS_SUCCESS) {
return NSS_STATUS_UNAVAIL;
- /*
* Block off SIGPIPE and retry the command
* in case the other end closes the pipe on us.
*/
- sigemptyset(&sa.sa_mask);
- sa.sa_flags = 0;
- sa.sa_handler = SIG_IGN;
- if (sigaction(SIGPIPE, &sa, &osa) == 0) {
restore = 1;
retry = 2;
- } else {
restore = 0;
}retry = 1;
NACK NACK NACK
We do not mess with application signals in a nsswitch library absolutely unacceptable.
- do {
if (sss_cli_check_socket(errnop,
SSS_NSS_SOCKET_NAME) ==
SSS_STATUS_SUCCESS)
ret = sss_nss_make_request_nochecks(cmd, rd, repbuf,
replen, errnop);
- } while (ret == NSS_STATUS_UNAVAIL
&& (*errnop == EPIPE || *errnop == 0)
&& --retry > 0);
- if (restore)
sigaction(SIGPIPE, &osa, NULL);
- return sss_nss_make_request_nochecks(cmd, rd, repbuf, replen,
errnop);
- return ret;
}
errno_t check_server_cred(int sockfd)
It's a NACK.
Simo.
On Wed, 2012-05-30 at 16:27 -0400, Dmitri Pal wrote:
On 05/30/2012 09:08 AM, Simo Sorce wrote:
On Wed, 2012-05-30 at 08:23 -0400, Stephen Gallagher wrote:
This patch was submitted by Shantanu Goel in a Trac ticket. Sending to the list for review. I haven't yet had a chance to dig into it myself yet, but the concept as described in https://fedorahosted.org/sssd/ticket/1354 is sound, so I'd like to see this get cleaned up and included.
Seems like a big effort. Thank you for the patch! We would gladly accept it when the style, comment and functional concerns are addressed. Please do not be offended, it is just the process we rigorously follow. We would absolutely delighted to get more contributions in future!
The original submitter provided new patches attached to https://fedorahosted.org/sssd/ticket/1354 which I have attached for review. I haven't yet reviewed them myself.
On Fri, 2012-06-15 at 15:39 -0400, Stephen Gallagher wrote:
On Wed, 2012-05-30 at 16:27 -0400, Dmitri Pal wrote:
On 05/30/2012 09:08 AM, Simo Sorce wrote:
On Wed, 2012-05-30 at 08:23 -0400, Stephen Gallagher wrote:
This patch was submitted by Shantanu Goel in a Trac ticket. Sending to the list for review. I haven't yet had a chance to dig into it myself yet, but the concept as described in https://fedorahosted.org/sssd/ticket/1354 is sound, so I'd like to see this get cleaned up and included.
Seems like a big effort. Thank you for the patch! We would gladly accept it when the style, comment and functional concerns are addressed. Please do not be offended, it is just the process we rigorously follow. We would absolutely delighted to get more contributions in future!
The original submitter provided new patches attached to https://fedorahosted.org/sssd/ticket/1354 which I have attached for review. I haven't yet reviewed them myself.
I started doing the review today.
Short version: I think the approach here is wrong. My initial glance over this code a few weeks ago didn't catch the fundamental problem with it.
It's needlessly complex to run a single polling timer to catch idle timeouts. A simpler and easier to follow approach would be to just set a timer during accept_fd_handler() and reset it each time client_fd_handler() is set. If the timer ever fires, we know that this one, specific client has become idle, and we should then free the client context.
Any specialized disconnection logic that is needed should be handled in the client context destructor. In short, I think you've added a lot of overkill to the problem here.
Also, the use of send() and MSG_NOSIGNAL is Linux-specific, so we're going to need to add a configure check and conditionalize it. It's a worthwhile enhancement (and belongs in a separate patch), but we want to limit (where possible) the number of places that would break compilation on non-Linux systems.
Finally, please do not use tabs in your patches. The coding style for SSSD uses four spaces for indentation.
I'm going to take your patch and rework it, because it's become very important to get this in ASAP (we're having resource-exhaustion problems in real-world deployments) and I think it's necessary to get this in immediately.
I'll make the necessary modifications myself, but I'm still going to give the patch attribution to you, Shantanu. You've definitely saved me a lot of time and effort by investigating all the gotcha points (such as the MSG_NOSIGNAL piece).
Thank you very much for your contribution Shantanu. It's a great help and we hope you'll submit more patches in the future!
Hi Stephen,
Please feel free to modify the patch in any way or shape you deem necessary for inclusion. We are just glad that you agree there is a real problem which needs fixing. One thing I ask is if you expect to have rhel 5 or 6 test RPMs that we could test with the ultimate fix any time soon, please drop me a note and we will gladly install them on some of our problematic machines here to see if they address the problems we have seen.
Regards, Shantanu
________________________________ From: Stephen Gallagher sgallagh@redhat.com To: Development of the System Security Services Daemon sssd-devel@lists.fedorahosted.org Cc: dpal@redhat.com; Shantanu Goel sgoel01@yahoo.com Sent: Monday, June 18, 2012 9:11 AM Subject: Re: [SSSD] [PATCH] Add support for terminating idle connections in sssd_nss
On Fri, 2012-06-15 at 15:39 -0400, Stephen Gallagher wrote:
On Wed, 2012-05-30 at 16:27 -0400, Dmitri Pal wrote:
On 05/30/2012 09:08 AM, Simo Sorce wrote:
On Wed, 2012-05-30 at 08:23 -0400, Stephen Gallagher wrote:
This patch was submitted by Shantanu Goel in a Trac ticket. Sending to the list for review. I haven't yet had a chance to dig into it myself yet, but the concept as described in https://fedorahosted.org/sssd/ticket/1354 is sound, so I'd like to see this get cleaned up and included.
Seems like a big effort. Thank you for the patch! We would gladly accept it when the style, comment and functional concerns are addressed. Please do not be offended, it is just the process we rigorously follow. We would absolutely delighted to get more contributions in future!
The original submitter provided new patches attached to https://fedorahosted.org/sssd/ticket/1354 which I have attached for review. I haven't yet reviewed them myself.
I started doing the review today.
Short version: I think the approach here is wrong. My initial glance over this code a few weeks ago didn't catch the fundamental problem with it.
It's needlessly complex to run a single polling timer to catch idle timeouts. A simpler and easier to follow approach would be to just set a timer during accept_fd_handler() and reset it each time client_fd_handler() is set. If the timer ever fires, we know that this one, specific client has become idle, and we should then free the client context.
Any specialized disconnection logic that is needed should be handled in the client context destructor. In short, I think you've added a lot of overkill to the problem here.
Also, the use of send() and MSG_NOSIGNAL is Linux-specific, so we're going to need to add a configure check and conditionalize it. It's a worthwhile enhancement (and belongs in a separate patch), but we want to limit (where possible) the number of places that would break compilation on non-Linux systems.
Finally, please do not use tabs in your patches. The coding style for SSSD uses four spaces for indentation.
I'm going to take your patch and rework it, because it's become very important to get this in ASAP (we're having resource-exhaustion problems in real-world deployments) and I think it's necessary to get this in immediately.
I'll make the necessary modifications myself, but I'm still going to give the patch attribution to you, Shantanu. You've definitely saved me a lot of time and effort by investigating all the gotcha points (such as the MSG_NOSIGNAL piece).
Thank you very much for your contribution Shantanu. It's a great help and we hope you'll submit more patches in the future!
On Mon, 2012-06-18 at 06:30 -0700, Shantanu Goel wrote:
Hi Stephen,
Please feel free to modify the patch in any way or shape you deem necessary for inclusion. We are just glad that you agree there is a real problem which needs fixing. One thing I ask is if you expect to have rhel 5 or 6 test RPMs that we could test with the ultimate fix any time soon, please drop me a note and we will gladly install them on some of our problematic machines here to see if they address the problems we have seen.
Sure, once this is done I'm going to be committing it upstream for the master branch (future 1.9), the sssd-1-8 branch (our current LTM release) and the sssd-1-5 branch (our previous LTM release).
You should be able to pull the patches from the sssd-1-5 branch and build them for your systems once they're ready.
On Mon, 2012-06-18 at 09:33 -0400, Stephen Gallagher wrote:
On Mon, 2012-06-18 at 06:30 -0700, Shantanu Goel wrote:
Hi Stephen,
Please feel free to modify the patch in any way or shape you deem necessary for inclusion. We are just glad that you agree there is a real problem which needs fixing. One thing I ask is if you expect to have rhel 5 or 6 test RPMs that we could test with the ultimate fix any time soon, please drop me a note and we will gladly install them on some of our problematic machines here to see if they address the problems we have seen.
Sure, once this is done I'm going to be committing it upstream for the master branch (future 1.9), the sssd-1-8 branch (our current LTM release) and the sssd-1-5 branch (our previous LTM release).
You should be able to pull the patches from the sssd-1-5 branch and build them for your systems once they're ready.
Ok, new patches attached. Shantanu, these are currently designed for the master branch. We'll get them committed there first and tested out for a little while, then we'll backport them.
Patch 0001: Return the correct errno value. Previously it could have been reset by closing the socket.
Patch 0002: Add some additional debugging to the client_destructor()
Patch 0003: On systems that support MSG_NOSIGNAL, we should use it. This way, if a client app isn't configured to listen for SIGPIPE, it will not crash.
Patch 0004: Add a timer to each client context. If sixty seconds pass (configurable in the patch 0005) without either read or write activity, we will free the client context and close the socket. The client code is already written to be tolerant of this and will reconnect on the next request. This will help us avoid resource exhaustion if we have clients that hang on to NSS and PAM file descriptors indefinitely (like 'su' and 'login' do for PAM).
Patch 0005: Make the client idle timeout value configurable and add it to the manpages and config API.
Thanks Stephen, we will give the patches a try here and report back how it looks.
Regards, Shantanu
________________________________ From: Stephen Gallagher sgallagh@redhat.com To: Development of the System Security Services Daemon sssd-devel@lists.fedorahosted.org Cc: Shantanu Goel sgoel01@yahoo.com Sent: Monday, June 18, 2012 11:33 AM Subject: Re: [SSSD] [PATCH] Add support for terminating idle connections in sssd_nss
On Mon, 2012-06-18 at 09:33 -0400, Stephen Gallagher wrote:
On Mon, 2012-06-18 at 06:30 -0700, Shantanu Goel wrote:
Hi Stephen,
Please feel free to modify the patch in any way or shape you deem necessary for inclusion. We are just glad that you agree there is a real problem which needs fixing. One thing I ask is if you expect to have rhel 5 or 6 test RPMs that we could test with the ultimate fix any time soon, please drop me a note and we will gladly install them on some of our problematic machines here to see if they address the problems we have seen.
Sure, once this is done I'm going to be committing it upstream for the master branch (future 1.9), the sssd-1-8 branch (our current LTM release) and the sssd-1-5 branch (our previous LTM release).
You should be able to pull the patches from the sssd-1-5 branch and build them for your systems once they're ready.
Ok, new patches attached. Shantanu, these are currently designed for the master branch. We'll get them committed there first and tested out for a little while, then we'll backport them.
Patch 0001: Return the correct errno value. Previously it could have been reset by closing the socket.
Patch 0002: Add some additional debugging to the client_destructor()
Patch 0003: On systems that support MSG_NOSIGNAL, we should use it. This way, if a client app isn't configured to listen for SIGPIPE, it will not crash.
Patch 0004: Add a timer to each client context. If sixty seconds pass (configurable in the patch 0005) without either read or write activity, we will free the client context and close the socket. The client code is already written to be tolerant of this and will reconnect on the next request. This will help us avoid resource exhaustion if we have clients that hang on to NSS and PAM file descriptors indefinitely (like 'su' and 'login' do for PAM).
Patch 0005: Make the client idle timeout value configurable and add it to the manpages and config API.
On Mon, 2012-06-18 at 11:33 -0400, Stephen Gallagher wrote:
On Mon, 2012-06-18 at 09:33 -0400, Stephen Gallagher wrote:
On Mon, 2012-06-18 at 06:30 -0700, Shantanu Goel wrote:
Hi Stephen,
Please feel free to modify the patch in any way or shape you deem necessary for inclusion. We are just glad that you agree there is a real problem which needs fixing. One thing I ask is if you expect to have rhel 5 or 6 test RPMs that we could test with the ultimate fix any time soon, please drop me a note and we will gladly install them on some of our problematic machines here to see if they address the problems we have seen.
Sure, once this is done I'm going to be committing it upstream for the master branch (future 1.9), the sssd-1-8 branch (our current LTM release) and the sssd-1-5 branch (our previous LTM release).
You should be able to pull the patches from the sssd-1-5 branch and build them for your systems once they're ready.
Ok, new patches attached. Shantanu, these are currently designed for the master branch. We'll get them committed there first and tested out for a little while, then we'll backport them.
Patch 0001: Return the correct errno value. Previously it could have been reset by closing the socket.
Patch 0002: Add some additional debugging to the client_destructor()
Patch 0003: On systems that support MSG_NOSIGNAL, we should use it. This way, if a client app isn't configured to listen for SIGPIPE, it will not crash.
Patch 0004: Add a timer to each client context. If sixty seconds pass (configurable in the patch 0005) without either read or write activity, we will free the client context and close the socket. The client code is already written to be tolerant of this and will reconnect on the next request. This will help us avoid resource exhaustion if we have clients that hang on to NSS and PAM file descriptors indefinitely (like 'su' and 'login' do for PAM).
Patch 0005: Make the client idle timeout value configurable and add it to the manpages and config API.
0001 ack 0002 ack 0003 Please always use send with a default set of flags, make the ifdef set the default set of flags (0 vs MSG_NOSIGNAL) 0004 ack (not like much the TODO but I was told 4/5 got split for reviewability, so ok) 0005 ack
Simo.
On Mon, 2012-06-18 at 13:32 -0400, Simo Sorce wrote:
On Mon, 2012-06-18 at 11:33 -0400, Stephen Gallagher wrote:
On Mon, 2012-06-18 at 09:33 -0400, Stephen Gallagher wrote:
On Mon, 2012-06-18 at 06:30 -0700, Shantanu Goel wrote:
Hi Stephen,
Please feel free to modify the patch in any way or shape you deem necessary for inclusion. We are just glad that you agree there is a real problem which needs fixing. One thing I ask is if you expect to have rhel 5 or 6 test RPMs that we could test with the ultimate fix any time soon, please drop me a note and we will gladly install them on some of our problematic machines here to see if they address the problems we have seen.
Sure, once this is done I'm going to be committing it upstream for the master branch (future 1.9), the sssd-1-8 branch (our current LTM release) and the sssd-1-5 branch (our previous LTM release).
You should be able to pull the patches from the sssd-1-5 branch and build them for your systems once they're ready.
Ok, new patches attached. Shantanu, these are currently designed for the master branch. We'll get them committed there first and tested out for a little while, then we'll backport them.
Patch 0001: Return the correct errno value. Previously it could have been reset by closing the socket.
Patch 0002: Add some additional debugging to the client_destructor()
Patch 0003: On systems that support MSG_NOSIGNAL, we should use it. This way, if a client app isn't configured to listen for SIGPIPE, it will not crash.
Patch 0004: Add a timer to each client context. If sixty seconds pass (configurable in the patch 0005) without either read or write activity, we will free the client context and close the socket. The client code is already written to be tolerant of this and will reconnect on the next request. This will help us avoid resource exhaustion if we have clients that hang on to NSS and PAM file descriptors indefinitely (like 'su' and 'login' do for PAM).
Patch 0005: Make the client idle timeout value configurable and add it to the manpages and config API.
0001 ack 0002 ack 0003 Please always use send with a default set of flags, make the ifdef set the default set of flags (0 vs MSG_NOSIGNAL) 0004 ack (not like much the TODO but I was told 4/5 got split for reviewability, so ok) 0005 ack
Thanks for the review. New patches attached.
On Mon, 2012-06-18 at 13:49 -0400, Stephen Gallagher wrote:
On Mon, 2012-06-18 at 13:32 -0400, Simo Sorce wrote:
On Mon, 2012-06-18 at 11:33 -0400, Stephen Gallagher wrote:
On Mon, 2012-06-18 at 09:33 -0400, Stephen Gallagher wrote:
On Mon, 2012-06-18 at 06:30 -0700, Shantanu Goel wrote:
Hi Stephen,
Please feel free to modify the patch in any way or shape you deem necessary for inclusion. We are just glad that you agree there is a real problem which needs fixing. One thing I ask is if you expect to have rhel 5 or 6 test RPMs that we could test with the ultimate fix any time soon, please drop me a note and we will gladly install them on some of our problematic machines here to see if they address the problems we have seen.
Sure, once this is done I'm going to be committing it upstream for the master branch (future 1.9), the sssd-1-8 branch (our current LTM release) and the sssd-1-5 branch (our previous LTM release).
You should be able to pull the patches from the sssd-1-5 branch and build them for your systems once they're ready.
Ok, new patches attached. Shantanu, these are currently designed for the master branch. We'll get them committed there first and tested out for a little while, then we'll backport them.
Patch 0001: Return the correct errno value. Previously it could have been reset by closing the socket.
Patch 0002: Add some additional debugging to the client_destructor()
Patch 0003: On systems that support MSG_NOSIGNAL, we should use it. This way, if a client app isn't configured to listen for SIGPIPE, it will not crash.
Patch 0004: Add a timer to each client context. If sixty seconds pass (configurable in the patch 0005) without either read or write activity, we will free the client context and close the socket. The client code is already written to be tolerant of this and will reconnect on the next request. This will help us avoid resource exhaustion if we have clients that hang on to NSS and PAM file descriptors indefinitely (like 'su' and 'login' do for PAM).
Patch 0005: Make the client idle timeout value configurable and add it to the manpages and config API.
0001 ack 0002 ack 0003 Please always use send with a default set of flags, make the ifdef set the default set of flags (0 vs MSG_NOSIGNAL) 0004 ack (not like much the TODO but I was told 4/5 got split for reviewability, so ok) 0005 ack
Thanks for the review. New patches attached.
ack to all.
Simo.
On Mon, 2012-06-18 at 14:11 -0400, Simo Sorce wrote:
On Mon, 2012-06-18 at 13:49 -0400, Stephen Gallagher wrote:
On Mon, 2012-06-18 at 13:32 -0400, Simo Sorce wrote:
On Mon, 2012-06-18 at 11:33 -0400, Stephen Gallagher wrote:
On Mon, 2012-06-18 at 09:33 -0400, Stephen Gallagher wrote:
On Mon, 2012-06-18 at 06:30 -0700, Shantanu Goel wrote:
Hi Stephen,
Please feel free to modify the patch in any way or shape you deem necessary for inclusion. We are just glad that you agree there is a real problem which needs fixing. One thing I ask is if you expect to have rhel 5 or 6 test RPMs that we could test with the ultimate fix any time soon, please drop me a note and we will gladly install them on some of our problematic machines here to see if they address the problems we have seen.
Sure, once this is done I'm going to be committing it upstream for the master branch (future 1.9), the sssd-1-8 branch (our current LTM release) and the sssd-1-5 branch (our previous LTM release).
You should be able to pull the patches from the sssd-1-5 branch and build them for your systems once they're ready.
Ok, new patches attached. Shantanu, these are currently designed for the master branch. We'll get them committed there first and tested out for a little while, then we'll backport them.
Patch 0001: Return the correct errno value. Previously it could have been reset by closing the socket.
Patch 0002: Add some additional debugging to the client_destructor()
Patch 0003: On systems that support MSG_NOSIGNAL, we should use it. This way, if a client app isn't configured to listen for SIGPIPE, it will not crash.
Patch 0004: Add a timer to each client context. If sixty seconds pass (configurable in the patch 0005) without either read or write activity, we will free the client context and close the socket. The client code is already written to be tolerant of this and will reconnect on the next request. This will help us avoid resource exhaustion if we have clients that hang on to NSS and PAM file descriptors indefinitely (like 'su' and 'login' do for PAM).
Patch 0005: Make the client idle timeout value configurable and add it to the manpages and config API.
0001 ack 0002 ack 0003 Please always use send with a default set of flags, make the ifdef set the default set of flags (0 vs MSG_NOSIGNAL) 0004 ack (not like much the TODO but I was told 4/5 got split for reviewability, so ok) 0005 ack
Thanks for the review. New patches attached.
ack to all.
Pushed to master. Also backported to sssd-1-8 and sssd-1-5 and pushed there as well. See attachments for the exact backported patches.
Thanks, we will test the 1.5 patches and report back if we run into any issues.
________________________________ From: Stephen Gallagher sgallagh@redhat.com To: Development of the System Security Services Daemon sssd-devel@lists.fedorahosted.org Cc: Shantanu Goel sgoel01@yahoo.com Sent: Monday, June 18, 2012 2:43 PM Subject: Re: [SSSD] [PATCH] Add support for terminating idle connections in sssd_nss
On Mon, 2012-06-18 at 14:11 -0400, Simo Sorce wrote:
On Mon, 2012-06-18 at 13:49 -0400, Stephen Gallagher wrote:
On Mon, 2012-06-18 at 13:32 -0400, Simo Sorce wrote:
On Mon, 2012-06-18 at 11:33 -0400, Stephen Gallagher wrote:
On Mon, 2012-06-18 at 09:33 -0400, Stephen Gallagher wrote:
On Mon, 2012-06-18 at 06:30 -0700, Shantanu Goel wrote:
Hi Stephen,
Please feel free to modify the patch in any way or shape you deem necessary for inclusion. We are just glad that you agree there is a real problem which needs fixing. One thing I ask is if you expect to have rhel 5 or 6 test RPMs that we could test with the ultimate fix any time soon, please drop me a note and we will gladly install them on some of our problematic machines here to see if they address the problems we have seen.
Sure, once this is done I'm going to be committing it upstream for the master branch (future 1.9), the sssd-1-8 branch (our current LTM release) and the sssd-1-5 branch (our previous LTM release).
You should be able to pull the patches from the sssd-1-5 branch and build them for your systems once they're ready.
Ok, new patches attached. Shantanu, these are currently designed for the master branch. We'll get them committed there first and tested out for a little while, then we'll backport them.
Patch 0001: Return the correct errno value. Previously it could have been reset by closing the socket.
Patch 0002: Add some additional debugging to the client_destructor()
Patch 0003: On systems that support MSG_NOSIGNAL, we should use it. This way, if a client app isn't configured to listen for SIGPIPE, it will not crash.
Patch 0004: Add a timer to each client context. If sixty seconds pass (configurable in the patch 0005) without either read or write activity, we will free the client context and close the socket. The client code is already written to be tolerant of this and will reconnect on the next request. This will help us avoid resource exhaustion if we have clients that hang on to NSS and PAM file descriptors indefinitely (like 'su' and 'login' do for PAM).
Patch 0005: Make the client idle timeout value configurable and add it to the manpages and config API.
0001 ack 0002 ack 0003 Please always use send with a default set of flags, make the ifdef set the default set of flags (0 vs MSG_NOSIGNAL) 0004 ack (not like much the TODO but I was told 4/5 got split for reviewability, so ok) 0005 ack
Thanks for the review. New patches attached.
ack to all.
Pushed to master. Also backported to sssd-1-8 and sssd-1-5 and pushed there as well. See attachments for the exact backported patches.
sssd-devel@lists.fedorahosted.org