This is a patch for Postfix 2.4 that introduces stress-adaptive behavior. When a "public" network service runs into an "all processes are busy" condition, the master(8) daemon logs a warning, restarts the service, and runs it with "-o stress=yes" on the command line (under normal conditions it runs the service with "-o stress=" on the command line). This feature can be used to make main.cf parameter settings stress dependent, for example: /etc/postfix/main.cf: smtpd_timeout = ${stress?10}${stress:300} smtpd_hard_error_limit = ${stress?1}${stress:20} Translation: under conditions of stress, use an smtpd_timeout value of 10 seconds instead of 300, and use smtpd_hard_error_limit of 1 instead of 20. The ${name?value} and ${name:value} syntax is explained in the postconf(5) manpage. With these settings, Postfix will quickly drop connections from clients that make errors, and will significantly reduce the time that it waits for a client command. This may hurt some legitimate deliveries, but it will allow you to still keep some mail flowing. *** ./src/master/Makefile.in.orig Sat Mar 17 13:51:39 2007 --- ./src/master/Makefile.in Fri Oct 26 21:12:50 2007 *************** *** 130,135 **** --- 130,136 ---- master_ent.o: ../../include/inet_addr_host.h master_ent.o: ../../include/inet_addr_list.h master_ent.o: ../../include/iostuff.h + master_ent.o: ../../include/mail_conf.h master_ent.o: ../../include/mail_params.h master_ent.o: ../../include/mail_proto.h master_ent.o: ../../include/msg.h *************** *** 189,194 **** --- 190,196 ---- master_spawn.o: ../../include/argv.h master_spawn.o: ../../include/binhash.h master_spawn.o: ../../include/events.h + master_spawn.o: ../../include/mail_conf.h master_spawn.o: ../../include/msg.h master_spawn.o: ../../include/mymalloc.h master_spawn.o: ../../include/sys_defs.h *** ./src/master/master.h.orig Thu Dec 29 17:54:12 2005 --- ./src/master/master.h Fri Oct 26 21:12:50 2007 *************** *** 47,52 **** --- 47,54 ---- int max_proc; /* upper bound on # processes */ char *path; /* command pathname */ struct ARGV *args; /* argument vector */ + char *stress_param_val; /* stress value: "yes" or empty */ + time_t stress_expire_time; /* stress pulse stretcher */ int avail_proc; /* idle processes */ int total_proc; /* number of processes */ int throttle_delay; /* failure recovery parameter */ *** ./src/master/master_avail.c.orig Thu Jun 15 14:07:15 2006 --- ./src/master/master_avail.c Fri Oct 26 21:12:50 2007 *************** *** 26,31 **** --- 26,35 ---- /* available process, or this module causes a new process to be /* created to service the request. /* + /* When the service runs out of process slots, a warning is logged. + /* When the service is eligible for stress-mode operation, servers + /* are restarted and new servers are created with stress mode enabled. + /* /* master_avail_listen() ensures that someone monitors the service's /* listen socket for connection requests (as long as resources /* to handle connection requests are available). This function may *************** *** 76,81 **** --- 80,86 ---- static void master_avail_event(int event, char *context) { MASTER_SERV *serv = (MASTER_SERV *) context; + time_t now; int n; if (event == 0) /* XXX Can this happen? */ *************** *** 84,89 **** --- 89,118 ---- for (n = 0; n < serv->listen_fd_count; n++) event_disable_readwrite(serv->listen_fd[n]); } else { + + /* + * When all servers for a public internet service are busy, we start + * creating server processes with "-o stress=yes" on the command + * line, and keep creating such processes until the process count is + * below the limit for at least 1000 seconds. This provides a minimal + * solution that can be adopted into legacy and stable Postfix + * releases. + * + * This is not the right place to update serv->stress_param_val in + * response to stress level changes. Doing so would would contaminate + * the "postfix reload" code with stress management implementation + * details, creating a source of future bugs. Instead, we update + * simple counters or flags here, and use their values to determine + * the proper serv->stress_param_val value when exec-ing a server + * process. + */ + if (serv->stress_param_val != 0 + && !MASTER_LIMIT_OK(serv->max_proc, serv->total_proc + 1)) { + now = event_time(); + if (serv->stress_expire_time < now) + master_restart_service(serv); + serv->stress_expire_time = now + 1000; + } master_spawn(serv); } } *************** *** 120,125 **** --- 149,157 ---- serv->ext_name, serv->name, serv->max_proc); msg_warn("to avoid this condition, increase the process count " "in master.cf or reduce the service time per client"); + if (serv->stress_param_val) + msg_warn("see http://www.postfix.org/STRESS_README.html for " + "examples of stress-adapting configuration settings"); } } } *** ./src/master/master_conf.c.orig Thu Dec 29 17:54:12 2005 --- ./src/master/master_conf.c Fri Oct 26 21:12:50 2007 *************** *** 134,139 **** --- 134,140 ---- SWAP(char *, serv->ext_name, entry->ext_name); SWAP(char *, serv->path, entry->path); SWAP(ARGV *, serv->args, entry->args); + SWAP(char *, serv->stress_param_val, entry->stress_param_val); master_restart_service(serv); free_master_ent(entry); } *** ./src/master/master_ent.c.orig Thu Jun 15 14:07:15 2006 --- ./src/master/master_ent.c Fri Oct 26 21:12:50 2007 *************** *** 94,99 **** --- 94,100 ---- #include #include #include + #include /* Local stuff. */ *************** *** 511,516 **** --- 512,525 ---- argv_add(serv->args, "-u", (char *) 0); if (chroot) argv_add(serv->args, "-c", (char *) 0); + if ((serv->flags & MASTER_FLAG_LOCAL_ONLY) == 0) { + argv_add(serv->args, "-o", "stress=" CONFIG_BOOL_YES, (char *) 0); + serv->stress_param_val = + serv->args->argv[serv->args->argc - 1] + sizeof("stress=") - 1; + serv->stress_param_val[0] = 0; + } else + serv->stress_param_val = 0; + serv->stress_expire_time = 0; if (serv->listen_fd_count > 1) argv_add(serv->args, "-s", vstring_str(vstring_sprintf(junk, "%d", serv->listen_fd_count)), *** ./src/master/master_spawn.c.orig Thu Jun 15 14:07:15 2006 --- ./src/master/master_spawn.c Fri Oct 26 21:12:50 2007 *************** *** 70,75 **** --- 70,79 ---- #include #include + /* Global library. */ + + #include + /* Application-specific. */ #include "master_proto.h" *************** *** 177,182 **** --- 181,190 ---- * connection and run the requested command. Leave child stderr * alone. Disable exit handlers: they should be executed by the * parent only. + * + * When we reach the process limit on a public internet service, we + * create stress-mode processes until the process count stays below + * the limit for some amount of time. See master_avail_listen(). */ case 0: msg_cleanup((void (*) (void)) 0); /* disable exit handler */ *************** *** 216,221 **** --- 224,231 ---- vstring_sprintf(env_gen, "%s=%o", MASTER_GEN_NAME, master_generation); if (putenv(vstring_str(env_gen)) < 0) msg_fatal("%s: putenv: %m", myname); + if (serv->stress_param_val && serv->stress_expire_time > event_time()) + serv->stress_param_val[0] = CONFIG_BOOL_YES[0]; execvp(serv->path, serv->args->argv); msg_fatal("%s: exec %s: %m", myname, serv->path);