Index: src/comm_poll.c
===================================================================
RCS file: src/comm_poll.c
diff -N src/comm_poll.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/comm_poll.c	24 May 2006 14:31:51 -0000
@@ -0,0 +1,689 @@
+
+/*
+ * $Id: comm_select.c,v 1.66 2006/05/22 19:01:32 serassio Exp $
+ *
+ * DEBUG: section 5     Socket Functions
+ *
+ * SQUID Web Proxy Cache          http://www.squid-cache.org/
+ * ----------------------------------------------------------
+ *
+ *  Squid is the result of efforts by numerous individuals from
+ *  the Internet community; see the CONTRIBUTORS file for full
+ *  details.   Many organizations have provided support for Squid's
+ *  development; see the SPONSORS file for full details.  Squid is
+ *  Copyrighted (C) 2001 by the Regents of the University of
+ *  California; see the COPYRIGHT file for full details.  Squid
+ *  incorporates software developed and/or copyrighted by other
+ *  sources; see the CREDITS file for full details.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
+ *
+ */
+
+#include "squid.h"
+
+#if USE_POLL
+
+static int MAX_POLL_TIME = 1000;	/* see also comm_quick_poll_required() */
+
+
+#ifndef        howmany
+#define howmany(x, y)   (((x)+((y)-1))/(y))
+#endif
+#ifndef        NBBY
+#define        NBBY    8
+#endif
+#define FD_MASK_BYTES sizeof(fd_mask)
+#define FD_MASK_BITS (FD_MASK_BYTES*NBBY)
+
+/* STATIC */
+static int fdIsHttp(int fd);
+static int fdIsIcp(int fd);
+static int fdIsDns(int fd);
+static int commDeferRead(int fd);
+static void checkTimeouts(void);
+static OBJH commIncomingStats;
+static int comm_check_incoming_poll_handlers(int nfds, int *fds);
+static void comm_poll_dns_incoming(void);
+
+static fd_set global_readfds;
+static fd_set global_writefds;
+static int nreadfds;
+static int nwritefds;
+
+/*
+ * Automatic tuning for incoming requests:
+ *
+ * INCOMING sockets are the ICP and HTTP ports.  We need to check these
+ * fairly regularly, but how often?  When the load increases, we
+ * want to check the incoming sockets more often.  If we have a lot
+ * of incoming ICP, then we need to check these sockets more than
+ * if we just have HTTP.
+ *
+ * The variables 'incoming_icp_interval' and 'incoming_http_interval' 
+ * determine how many normal I/O events to process before checking
+ * incoming sockets again.  Note we store the incoming_interval
+ * multipled by a factor of (2^INCOMING_FACTOR) to have some
+ * pseudo-floating point precision.
+ *
+ * The variable 'icp_io_events' and 'http_io_events' counts how many normal
+ * I/O events have been processed since the last check on the incoming
+ * sockets.  When io_events > incoming_interval, its time to check incoming
+ * sockets.
+ *
+ * Every time we check incoming sockets, we count how many new messages
+ * or connections were processed.  This is used to adjust the
+ * incoming_interval for the next iteration.  The new incoming_interval
+ * is calculated as the current incoming_interval plus what we would
+ * like to see as an average number of events minus the number of
+ * events just processed.
+ *
+ *  incoming_interval = incoming_interval + target_average - number_of_events_processed
+ *
+ * There are separate incoming_interval counters for both HTTP and ICP events
+ * 
+ * You can see the current values of the incoming_interval's, as well as
+ * a histogram of 'incoming_events' by asking the cache manager
+ * for 'comm_incoming', e.g.:
+ *
+ *      % ./client mgr:comm_incoming
+ *
+ * Caveats:
+ *
+ *      - We have MAX_INCOMING_INTEGER as a magic upper limit on
+ *        incoming_interval for both types of sockets.  At the
+ *        largest value the cache will effectively be idling.
+ *
+ *      - The higher the INCOMING_FACTOR, the slower the algorithm will
+ *        respond to load spikes/increases/decreases in demand. A value
+ *        between 3 and 8 is recommended.
+ */
+
+#define MAX_INCOMING_INTEGER 256
+#define INCOMING_FACTOR 5
+#define MAX_INCOMING_INTERVAL (MAX_INCOMING_INTEGER << INCOMING_FACTOR)
+static int icp_io_events = 0;
+static int dns_io_events = 0;
+static int http_io_events = 0;
+static int incoming_icp_interval = 16 << INCOMING_FACTOR;
+static int incoming_dns_interval = 16 << INCOMING_FACTOR;
+static int incoming_http_interval = 16 << INCOMING_FACTOR;
+#define commCheckICPIncoming (++icp_io_events > (incoming_icp_interval>> INCOMING_FACTOR))
+#define commCheckDNSIncoming (++dns_io_events > (incoming_dns_interval>> INCOMING_FACTOR))
+#define commCheckHTTPIncoming (++http_io_events > (incoming_http_interval>> INCOMING_FACTOR))
+
+static int
+fdIsIcp(int fd)
+{
+    if (fd == theInIcpConnection)
+	return 1;
+    if (fd == theOutIcpConnection)
+	return 1;
+    return 0;
+}
+
+static int
+fdIsDns(int fd)
+{
+    if (fd == DnsSocket)
+	return 1;
+    return 0;
+}
+
+static int
+fdIsHttp(int fd)
+{
+    int j;
+    for (j = 0; j < NHttpSockets; j++) {
+	if (fd == HttpSockets[j])
+	    return 1;
+    }
+    return 0;
+}
+
+#if DELAY_POOLS
+static int slowfdcnt = 0;
+static int slowfdarr[SQUID_MAXFD];
+
+static void
+commAddSlowFd(int fd)
+{
+    assert(slowfdcnt < SQUID_MAXFD);
+    slowfdarr[slowfdcnt++] = fd;
+}
+
+static int
+commGetSlowFd(void)
+{
+    int whichfd, retfd;
+
+    if (!slowfdcnt)
+	return -1;
+    whichfd = squid_random() % slowfdcnt;
+    retfd = slowfdarr[whichfd];
+    slowfdarr[whichfd] = slowfdarr[--slowfdcnt];
+    return retfd;
+}
+#endif
+
+static int
+comm_check_incoming_poll_handlers(int nfds, int *fds)
+{
+    int i;
+    int fd;
+    PF *hdl = NULL;
+    int npfds;
+    struct pollfd pfds[3 + MAXHTTPPORTS];
+    incoming_sockets_accepted = 0;
+    for (i = npfds = 0; i < nfds; i++) {
+	int events;
+	fd = fds[i];
+	events = 0;
+	if (fd_table[fd].read_handler)
+	    events |= POLLRDNORM;
+	if (fd_table[fd].write_handler)
+	    events |= POLLWRNORM;
+	if (events) {
+	    pfds[npfds].fd = fd;
+	    pfds[npfds].events = events;
+	    pfds[npfds].revents = 0;
+	    npfds++;
+	}
+    }
+    if (!nfds)
+	return -1;
+#if !ALARM_UPDATES_TIME
+    getCurrentTime();
+#endif
+    statCounter.syscalls.polls++;
+    if (poll(pfds, npfds, 0) < 1)
+	return incoming_sockets_accepted;
+    for (i = 0; i < npfds; i++) {
+	int revents;
+	if (((revents = pfds[i].revents) == 0) || ((fd = pfds[i].fd) == -1))
+	    continue;
+	if (revents & (POLLRDNORM | POLLIN | POLLHUP | POLLERR)) {
+	    if ((hdl = fd_table[fd].read_handler)) {
+		fd_table[fd].read_handler = NULL;
+		hdl(fd, fd_table[fd].read_data);
+	    } else if (pfds[i].events & POLLRDNORM)
+		debug(5, 1) ("comm_poll_incoming: FD %d NULL read handler\n",
+		    fd);
+	}
+	if (revents & (POLLWRNORM | POLLOUT | POLLHUP | POLLERR)) {
+	    if ((hdl = fd_table[fd].write_handler)) {
+		fd_table[fd].write_handler = NULL;
+		hdl(fd, fd_table[fd].write_data);
+	    } else if (pfds[i].events & POLLWRNORM)
+		debug(5, 1) ("comm_poll_incoming: FD %d NULL write_handler\n",
+		    fd);
+	}
+    }
+    return incoming_sockets_accepted;
+}
+
+static void
+comm_poll_icp_incoming(void)
+{
+    int nfds = 0;
+    int fds[2];
+    int nevents;
+    icp_io_events = 0;
+    if (theInIcpConnection >= 0)
+	fds[nfds++] = theInIcpConnection;
+    if (theInIcpConnection != theOutIcpConnection)
+	if (theOutIcpConnection >= 0)
+	    fds[nfds++] = theOutIcpConnection;
+    if (nfds == 0)
+	return;
+    nevents = comm_check_incoming_poll_handlers(nfds, fds);
+    incoming_icp_interval += Config.comm_incoming.icp_average - nevents;
+    if (incoming_icp_interval < Config.comm_incoming.icp_min_poll)
+	incoming_icp_interval = Config.comm_incoming.icp_min_poll;
+    if (incoming_icp_interval > MAX_INCOMING_INTERVAL)
+	incoming_icp_interval = MAX_INCOMING_INTERVAL;
+    if (nevents > INCOMING_ICP_MAX)
+	nevents = INCOMING_ICP_MAX;
+    statHistCount(&statCounter.comm_icp_incoming, nevents);
+}
+
+static void
+comm_poll_http_incoming(void)
+{
+    int nfds = 0;
+    int fds[MAXHTTPPORTS];
+    int j;
+    int nevents;
+    http_io_events = 0;
+    for (j = 0; j < NHttpSockets; j++) {
+	if (HttpSockets[j] < 0)
+	    continue;
+	if (commDeferRead(HttpSockets[j]))
+	    continue;
+	fds[nfds++] = HttpSockets[j];
+    }
+    nevents = comm_check_incoming_poll_handlers(nfds, fds);
+    incoming_http_interval = incoming_http_interval
+	+ Config.comm_incoming.http_average - nevents;
+    if (incoming_http_interval < Config.comm_incoming.http_min_poll)
+	incoming_http_interval = Config.comm_incoming.http_min_poll;
+    if (incoming_http_interval > MAX_INCOMING_INTERVAL)
+	incoming_http_interval = MAX_INCOMING_INTERVAL;
+    if (nevents > INCOMING_HTTP_MAX)
+	nevents = INCOMING_HTTP_MAX;
+    statHistCount(&statCounter.comm_http_incoming, nevents);
+}
+
+/* poll all sockets; call handlers for those that are ready. */
+int
+comm_poll(int msec)
+{
+    struct pollfd pfds[SQUID_MAXFD];
+#if DELAY_POOLS
+    fd_set slowfds;
+#endif
+    int fd;
+    unsigned int i;
+    unsigned int maxfd;
+    unsigned int nfds;
+    unsigned int npending;
+    int num;
+    int callicp = 0, callhttp = 0;
+    int calldns = 0;
+    static time_t last_timeout = 0;
+    double timeout = current_dtime + (msec / 1000.0);
+    do {
+#if !ALARM_UPDATES_TIME
+	double start;
+	getCurrentTime();
+	start = current_dtime;
+#endif
+	/* Handle any fs callbacks that need doing */
+	storeDirCallback();
+#if DELAY_POOLS
+	FD_ZERO(&slowfds);
+#endif
+	if (commCheckICPIncoming)
+	    comm_poll_icp_incoming();
+	if (commCheckDNSIncoming)
+	    comm_poll_dns_incoming();
+	if (commCheckHTTPIncoming)
+	    comm_poll_http_incoming();
+	callicp = calldns = callhttp = 0;
+	nfds = 0;
+	npending = 0;
+	maxfd = Biggest_FD + 1;
+	for (i = 0; i < maxfd; i++) {
+	    int events;
+	    events = 0;
+	    /* Check each open socket for a handler. */
+	    if (fd_table[i].read_handler) {
+		int dopoll = 1;
+		switch (commDeferRead(i)) {
+		case 0:
+		    break;
+		case 1:
+		    dopoll = 0;
+		    break;
+#if DELAY_POOLS
+		case -1:
+		    FD_SET(i, &slowfds);
+		    break;
+#endif
+		default:
+		    fatalf("bad return value from commDeferRead(FD %d)\n", i);
+		    /* NOTREACHED */
+		}
+		if (dopoll) {
+		    switch (fd_table[i].read_pending) {
+		    case COMM_PENDING_NORMAL:
+			events |= POLLRDNORM;
+			break;
+		    case COMM_PENDING_WANTS_WRITE:
+			events |= POLLWRNORM;
+			break;
+		    case COMM_PENDING_WANTS_READ:
+			events |= POLLRDNORM;
+			break;
+		    case COMM_PENDING_NOW:
+			events |= POLLRDNORM;
+			npending++;
+			break;
+		    }
+		}
+	    }
+	    if (fd_table[i].write_handler) {
+		switch (fd_table[i].write_pending) {
+		case COMM_PENDING_NORMAL:
+		    events |= POLLWRNORM;
+		    break;
+		case COMM_PENDING_WANTS_WRITE:
+		    events |= POLLWRNORM;
+		    break;
+		case COMM_PENDING_WANTS_READ:
+		    events |= POLLRDNORM;
+		    break;
+		case COMM_PENDING_NOW:
+		    events |= POLLWRNORM;
+		    npending++;
+		    break;
+		}
+	    }
+	    if (events) {
+		pfds[nfds].fd = i;
+		pfds[nfds].events = events;
+		pfds[nfds].revents = 0;
+		nfds++;
+	    }
+	}
+	if (nfds == 0) {
+	    assert(shutting_down);
+	    return COMM_SHUTDOWN;
+	}
+	if (npending)
+	    msec = 0;
+	if (msec > MAX_POLL_TIME)
+	    msec = MAX_POLL_TIME;
+	statCounter.syscalls.polls++;
+	num = poll(pfds, nfds, msec);
+	statCounter.select_loops++;
+	if (num < 0 && !ignoreErrno(errno)) {
+	    debug(5, 0) ("comm_poll: poll failure: %s\n", xstrerror());
+	    assert(errno != EINVAL);
+	    return COMM_ERROR;
+	    /* NOTREACHED */
+	}
+	debug(5, num ? 5 : 8) ("comm_poll: %d+%u FDs ready\n", num, npending);
+	statHistCount(&statCounter.select_fds_hist, num);
+	/* Check timeout handlers ONCE each second. */
+	if (squid_curtime > last_timeout) {
+	    last_timeout = squid_curtime;
+	    checkTimeouts();
+	}
+	if (num <= 0 && npending == 0)
+	    continue;
+	/* scan each socket but the accept socket. Poll this 
+	 * more frequently to minimize losses due to the 5 connect 
+	 * limit in SunOS */
+	for (i = 0; i < nfds; i++) {
+	    fde *F;
+	    int revents = pfds[i].revents;
+	    fd = pfds[i].fd;
+	    if (fd == -1)
+		continue;
+	    switch (fd_table[fd].read_pending) {
+	    case COMM_PENDING_NORMAL:
+	    case COMM_PENDING_WANTS_READ:
+		break;
+	    case COMM_PENDING_WANTS_WRITE:
+		if (pfds[i].revents & (POLLOUT | POLLWRNORM))
+		    revents |= POLLIN;
+		break;
+	    case COMM_PENDING_NOW:
+		revents |= POLLIN;
+		break;
+	    }
+	    switch (fd_table[fd].write_pending) {
+	    case COMM_PENDING_NORMAL:
+	    case COMM_PENDING_WANTS_WRITE:
+		break;
+	    case COMM_PENDING_WANTS_READ:
+		if (pfds[i].revents & (POLLIN | POLLRDNORM))
+		    revents |= POLLOUT;
+		break;
+	    case COMM_PENDING_NOW:
+		revents |= POLLOUT;
+		break;
+	    }
+	    if (revents == 0)
+		continue;
+	    if (fdIsIcp(fd)) {
+		callicp = 1;
+		continue;
+	    }
+	    if (fdIsDns(fd)) {
+		calldns = 1;
+		continue;
+	    }
+	    if (fdIsHttp(fd)) {
+		callhttp = 1;
+		continue;
+	    }
+	    F = &fd_table[fd];
+	    if (revents & (POLLRDNORM | POLLIN | POLLHUP | POLLERR)) {
+		PF *hdl = F->read_handler;
+		debug(5, 6) ("comm_poll: FD %d ready for reading\n", fd);
+		if (hdl == NULL)
+		    (void) 0;	/* Nothing to do */
+#if DELAY_POOLS
+		else if (FD_ISSET(fd, &slowfds))
+		    commAddSlowFd(fd);
+#endif
+		else {
+		    F->read_handler = NULL;
+		    F->read_pending = COMM_PENDING_NORMAL;
+		    hdl(fd, F->read_data);
+		    statCounter.select_fds++;
+		    if (commCheckICPIncoming)
+			comm_poll_icp_incoming();
+		    if (commCheckDNSIncoming)
+			comm_poll_dns_incoming();
+		    if (commCheckHTTPIncoming)
+			comm_poll_http_incoming();
+		}
+	    }
+	    if (revents & (POLLWRNORM | POLLOUT | POLLHUP | POLLERR)) {
+		PF *hdl = F->write_handler;
+		debug(5, 5) ("comm_poll: FD %d ready for writing\n", fd);
+		if (hdl != NULL) {
+		    F->write_handler = NULL;
+		    F->write_pending = COMM_PENDING_NORMAL;
+		    hdl(fd, F->write_data);
+		    statCounter.select_fds++;
+		    if (commCheckICPIncoming)
+			comm_poll_icp_incoming();
+		    if (commCheckDNSIncoming)
+			comm_poll_dns_incoming();
+		    if (commCheckHTTPIncoming)
+			comm_poll_http_incoming();
+		}
+	    }
+	    if (revents & POLLNVAL) {
+		close_handler *ch;
+		debug(5, 0) ("WARNING: FD %d has handlers, but it's invalid.\n", fd);
+		debug(5, 0) ("FD %d is a %s\n", fd, fdTypeStr[F->type]);
+		debug(5, 0) ("--> %s\n", F->desc);
+		debug(5, 0) ("tmout:%p read:%p write:%p\n",
+		    F->timeout_handler,
+		    F->read_handler,
+		    F->write_handler);
+		for (ch = F->close_handler; ch; ch = ch->next)
+		    debug(5, 0) (" close handler: %p\n", ch->handler);
+		if (F->close_handler) {
+		    commCallCloseHandlers(fd);
+		} else if (F->timeout_handler) {
+		    debug(5, 0) ("comm_poll: Calling Timeout Handler\n");
+		    F->timeout_handler(fd, F->timeout_data);
+		}
+		F->close_handler = NULL;
+		F->timeout_handler = NULL;
+		F->read_handler = NULL;
+		F->write_handler = NULL;
+		if (F->flags.open)
+		    fd_close(fd);
+	    }
+	}
+	if (callicp)
+	    comm_poll_icp_incoming();
+	if (calldns)
+	    comm_poll_dns_incoming();
+	if (callhttp)
+	    comm_poll_http_incoming();
+#if DELAY_POOLS
+	while ((fd = commGetSlowFd()) != -1) {
+	    fde *F = &fd_table[fd];
+	    PF *hdl = F->read_handler;
+	    debug(5, 6) ("comm_select: slow FD %d selected for reading\n", fd);
+	    if (hdl != NULL) {
+		F->read_handler = NULL;
+		F->read_pending = COMM_PENDING_NORMAL;
+		hdl(fd, F->read_data);
+		statCounter.select_fds++;
+		if (commCheckICPIncoming)
+		    comm_poll_icp_incoming();
+		if (commCheckDNSIncoming)
+		    comm_poll_dns_incoming();
+		if (commCheckHTTPIncoming)
+		    comm_poll_http_incoming();
+	    }
+	}
+#endif
+#if !ALARM_UPDATES_TIME
+	getCurrentTime();
+	statCounter.select_time += (current_dtime - start);
+#endif
+	return COMM_OK;
+    }
+    while (timeout > current_dtime);
+    debug(5, 8) ("comm_poll: time out: %ld.\n", (long int) squid_curtime);
+    return COMM_TIMEOUT;
+}
+
+
+static void
+comm_poll_dns_incoming(void)
+{
+    int nfds = 0;
+    int fds[2];
+    int nevents;
+    dns_io_events = 0;
+    if (DnsSocket < 0)
+	return;
+    fds[nfds++] = DnsSocket;
+    nevents = comm_check_incoming_poll_handlers(nfds, fds);
+    if (nevents < 0)
+	return;
+    incoming_dns_interval += Config.comm_incoming.dns_average - nevents;
+    if (incoming_dns_interval < Config.comm_incoming.dns_min_poll)
+	incoming_dns_interval = Config.comm_incoming.dns_min_poll;
+    if (incoming_dns_interval > MAX_INCOMING_INTERVAL)
+	incoming_dns_interval = MAX_INCOMING_INTERVAL;
+    if (nevents > INCOMING_DNS_MAX)
+	nevents = INCOMING_DNS_MAX;
+    statHistCount(&statCounter.comm_dns_incoming, nevents);
+}
+
+void
+comm_select_init(void)
+{
+    cachemgrRegister("comm_incoming",
+	"comm_incoming() stats",
+	commIncomingStats, 0, 1);
+    FD_ZERO(&global_readfds);
+    FD_ZERO(&global_writefds);
+    nreadfds = nwritefds = 0;
+}
+
+
+static void
+commIncomingStats(StoreEntry * sentry)
+{
+    StatCounters *f = &statCounter;
+    storeAppendPrintf(sentry, "Current incoming_icp_interval: %d\n",
+	incoming_icp_interval >> INCOMING_FACTOR);
+    storeAppendPrintf(sentry, "Current incoming_dns_interval: %d\n",
+	incoming_dns_interval >> INCOMING_FACTOR);
+    storeAppendPrintf(sentry, "Current incoming_http_interval: %d\n",
+	incoming_http_interval >> INCOMING_FACTOR);
+    storeAppendPrintf(sentry, "\n");
+    storeAppendPrintf(sentry, "Histogram of events per incoming socket type\n");
+    storeAppendPrintf(sentry, "ICP Messages handled per comm_poll_icp_incoming() call:\n");
+    statHistDump(&f->comm_icp_incoming, sentry, statHistIntDumper);
+    storeAppendPrintf(sentry, "DNS Messages handled per comm_poll_dns_incoming() call:\n");
+    statHistDump(&f->comm_dns_incoming, sentry, statHistIntDumper);
+    storeAppendPrintf(sentry, "HTTP Messages handled per comm_poll_http_incoming() call:\n");
+    statHistDump(&f->comm_http_incoming, sentry, statHistIntDumper);
+}
+
+void
+commUpdateReadBits(int fd, PF * handler)
+{
+    if (handler && !FD_ISSET(fd, &global_readfds)) {
+	FD_SET(fd, &global_readfds);
+	nreadfds++;
+    } else if (!handler && FD_ISSET(fd, &global_readfds)) {
+	FD_CLR(fd, &global_readfds);
+	nreadfds--;
+    }
+}
+
+void
+commUpdateWriteBits(int fd, PF * handler)
+{
+    if (handler && !FD_ISSET(fd, &global_writefds)) {
+	FD_SET(fd, &global_writefds);
+	nwritefds++;
+    } else if (!handler && FD_ISSET(fd, &global_writefds)) {
+	FD_CLR(fd, &global_writefds);
+	nwritefds--;
+    }
+}
+
+
+static int
+commDeferRead(int fd)
+{
+    fde *F = &fd_table[fd];
+    if (F->defer_check == NULL)
+	return 0;
+    return F->defer_check(fd, F->defer_data);
+}
+
+static void
+checkTimeouts(void)
+{
+    int fd;
+    fde *F = NULL;
+    PF *callback;
+    for (fd = 0; fd <= Biggest_FD; fd++) {
+	F = &fd_table[fd];
+	if (!F->flags.open)
+	    continue;
+	if (F->timeout == 0)
+	    continue;
+	if (F->timeout > squid_curtime)
+	    continue;
+	debug(5, 5) ("checkTimeouts: FD %d Expired\n", fd);
+	if (F->timeout_handler) {
+	    debug(5, 5) ("checkTimeouts: FD %d: Call timeout handler\n", fd);
+	    callback = F->timeout_handler;
+	    F->timeout_handler = NULL;
+	    callback(fd, F->timeout_data);
+	} else {
+	    debug(5, 5) ("checkTimeouts: FD %d: Forcing comm_close()\n", fd);
+	    comm_close(fd);
+	}
+    }
+}
+
+
+/* Called by async-io or diskd to speed up the polling */
+void
+comm_quick_poll_required(void)
+{
+    MAX_POLL_TIME = 10;
+}
+
+#endif
Index: src/comm_epoll.c
===================================================================
RCS file: src/comm_epoll.c
diff -N src/comm_epoll.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/comm_epoll.c	24 May 2006 14:31:51 -0000
@@ -0,0 +1,459 @@
+
+/*
+ * $Id: comm_select.c,v 1.66 2006/05/22 19:01:32 serassio Exp $
+ *
+ * DEBUG: section 5     Socket Functions
+ *
+ * SQUID Web Proxy Cache          http://www.squid-cache.org/
+ * ----------------------------------------------------------
+ *
+ *  Squid is the result of efforts by numerous individuals from
+ *  the Internet community; see the CONTRIBUTORS file for full
+ *  details.   Many organizations have provided support for Squid's
+ *  development; see the SPONSORS file for full details.  Squid is
+ *  Copyrighted (C) 2001 by the Regents of the University of
+ *  California; see the COPYRIGHT file for full details.  Squid
+ *  incorporates software developed and/or copyrighted by other
+ *  sources; see the CREDITS file for full details.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
+ *
+ */
+
+#include "squid.h"
+
+#if USE_EPOLL
+
+static int MAX_POLL_TIME = 1000;	/* see also comm_quick_poll_required() */
+
+/* epoll structs */
+static int kdpfd;
+static struct epoll_event *pevents;
+
+/* Array to keep track of backed off filedescriptors */
+static int backoff_fds[FD_SETSIZE];
+
+static void checkTimeouts(void);
+static int commDeferRead(int fd);
+
+static const char *
+epolltype_atoi(int x)
+{
+    switch (x) {
+
+    case EPOLL_CTL_ADD:
+	return "EPOLL_CTL_ADD";
+
+    case EPOLL_CTL_DEL:
+	return "EPOLL_CTL_DEL";
+
+    case EPOLL_CTL_MOD:
+	return "EPOLL_CTL_MOD";
+
+    default:
+	return "UNKNOWN_EPOLLCTL_OP";
+    }
+}
+
+/* Bring all fds back online */
+void
+commEpollBackon()
+{
+    fde *F;
+    int i;
+    int fd;
+    int j = 0;
+
+    for (i = 0; i < FD_SETSIZE; i++) {
+	if (backoff_fds[i]) {
+	    /* record the fd and zero the descriptor */
+	    fd = backoff_fds[i];
+	    F = &fd_table[fd];
+	    backoff_fds[i] = 0;
+
+	    /* If the fd is no longer backed off, ignore */
+	    if (!(F->epoll_backoff)) {
+		continue;
+	    }
+	    /* If the fd is still meant to be backed off, add it to the start of
+	     * the list and continue */
+	    if (commDeferRead(fd) == 1) {
+		backoff_fds[j++] = fd;
+		continue;
+	    }
+	    debug(5, 4) ("commEpollBackon: fd=%d\n", fd);
+
+	    /* Resume operations for this fd */
+	    commResumeFD(fd);
+	} else {
+	    /* Once we hit a non-backed off FD, we can break */
+	    break;
+	}
+    }
+}
+
+
+/* Back off on the next epoll for the given fd */
+void
+commEpollBackoff(int fd)
+{
+    commDeferFD(fd);
+}
+
+/* Defer reads from this fd */
+void
+commDeferFD(int fd)
+{
+    fde *F = &fd_table[fd];
+    struct epoll_event ev;
+    int epoll_ctl_type = 0;
+    int i;
+
+    /* Return if the fd is already backed off */
+    if (F->epoll_backoff) {
+	return;
+    }
+    for (i = 0; i < FD_SETSIZE; i++) {
+	if (!(backoff_fds[i]) || (backoff_fds[i] == fd)) {
+	    break;
+	}
+    }
+
+    /* die if we have no fd (very unlikely), if the fd has no existing epoll 
+     * state, if we are given a bad fd, or if the fd is not open. */
+    assert(i < FD_SETSIZE);
+    assert(F->epoll_state);
+    assert(fd >= 0);
+    assert(F->flags.open);
+
+    /* set up ev struct */
+    ev.events = 0;
+    ev.data.fd = fd;
+
+    /* If we were only waiting for reads, delete the fd, otherwise remove the 
+     * read event */
+    if (F->epoll_state == (EPOLLIN | EPOLLHUP | EPOLLERR)) {
+	epoll_ctl_type = EPOLL_CTL_DEL;
+    } else {
+	epoll_ctl_type = EPOLL_CTL_MOD;
+	ev.events = (F->epoll_state - EPOLLIN);
+    }
+    debug(5, 5) ("commDeferFD: epoll_ctl_type=%s, fd=%d epoll_state=%d\n", epolltype_atoi(epoll_ctl_type), fd, F->epoll_state);
+
+    if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) {
+	/* If an error occurs, log it */
+	debug(5, 1) ("commDeferFD: epoll_ctl(,%s,,): failed on fd=%d: %s\n", epolltype_atoi(epoll_ctl_type), fd, xstrerror());
+    } else {
+	backoff_fds[i] = fd;
+	F->epoll_backoff = 1;
+	F->epoll_state = ev.events;
+    }
+}
+
+/* Resume reading from the given fd */
+void
+commResumeFD(int fd)
+{
+    struct epoll_event ev;
+    int epoll_ctl_type = 0;
+    fde *F;
+
+    F = &fd_table[fd];
+
+    /* If the fd has been modified, do nothing and remove the flag */
+    if (!(F->read_handler) || !(F->epoll_backoff)) {
+	debug(5, 2) ("commResumeFD: fd=%d ignoring read_handler=%p, epoll_backoff=%d\n", fd, F->read_handler, F->epoll_backoff);
+	F->epoll_backoff = 0;
+	return;
+    }
+    /* we need to re-add the fd to the epoll list with EPOLLIN set */
+    ev.events = F->epoll_state | EPOLLIN | EPOLLHUP | EPOLLERR;
+    ev.data.fd = fd;
+
+    /* If epoll_state is not set, then this fd is only waiting for
+     * reads, and needs adding, otherwise we mod it to add  EPOLLIN */
+    if (!(F->epoll_state)) {
+	epoll_ctl_type = EPOLL_CTL_ADD;
+    } else {
+	epoll_ctl_type = EPOLL_CTL_MOD;
+    }
+    debug(5, 5) ("commResumeFD: epoll_ctl_type=%s, fd=%d\n", epolltype_atoi(epoll_ctl_type), fd);
+
+    /* Try and add the fd back into the epoll struct */
+    if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) {
+	/* If an error occurs, log */
+	debug(5, 1) ("commResumeFD: epoll_ctl(,%s,,): failed on fd=%d: %s\n", epolltype_atoi(epoll_ctl_type), fd, xstrerror());
+    } else {
+	F->epoll_backoff = 0;
+	F->epoll_state = ev.events;
+    }
+}
+void
+comm_select_init()
+{
+    int i;
+    pevents = (struct epoll_event *) xmalloc(SQUID_MAXFD * sizeof(struct epoll_event));
+    if (!pevents) {
+	fatalf("comm_select_init: xmalloc() failed: %s\n", xstrerror());
+    }
+    kdpfd = epoll_create(SQUID_MAXFD);
+
+    if (kdpfd < 0) {
+	fatalf("comm_select_init: epoll_create(): %s\n", xstrerror());
+    }
+    for (i = 0; i < FD_SETSIZE; i++) {
+	backoff_fds[i] = 0;
+    }
+}
+
+void
+commUpdateReadBits(int fd, PF * handler)
+{
+    /* Not imlpemented */
+}
+
+void
+commUpdateWriteBits(int fd, PF * handler)
+{
+    /* Not imlpemented */
+}
+
+void
+commSetSelect(int fd, unsigned int type, PF * handler, void *client_data, time_t timeout)
+{
+    fde *F = &fd_table[fd];
+    int epoll_ctl_type = 0;
+    struct epoll_event ev;
+
+    assert(fd >= 0);
+    assert(F->flags.open);
+    debug(5, 8) ("commSetSelect(fd=%d,type=%u,handler=%p,client_data=%p,timeout=%ld)\n", fd, type, handler, client_data, timeout);
+
+    ev.events = 0;
+    ev.data.fd = fd;
+
+    if (type & COMM_SELECT_READ) {
+	/* Only add the epoll event if the fd is not backed off */
+	if (handler && !(F->epoll_backoff)) {
+	    ev.events |= EPOLLIN;
+	}
+	F->read_handler = handler;
+	F->read_data = client_data;
+
+	// Otherwise, use previously stored value if the fd is not backed off
+    } else if ((F->epoll_state & EPOLLIN) && (F->read_handler) && !(F->epoll_backoff)) {
+	ev.events |= EPOLLIN;
+    }
+    if (type & COMM_SELECT_WRITE) {
+	if (handler) {
+	    ev.events |= EPOLLOUT;
+	}
+	F->write_handler = handler;
+	F->write_data = client_data;
+
+	// Otherwise, use previously stored value
+    } else if ((F->epoll_state & EPOLLOUT) && (F->write_handler)) {
+	ev.events |= EPOLLOUT;
+    }
+    if (ev.events) {
+	ev.events |= EPOLLHUP | EPOLLERR;
+    }
+    /* If the type is 0, force adding the fd to the epoll set */
+    if (!(type)) {
+	F->epoll_state = 0;
+    }
+    if (ev.events != F->epoll_state) {
+	// If the struct is already in epoll MOD or DEL, else ADD
+	if (F->epoll_state) {
+	    epoll_ctl_type = ev.events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL;
+	} else {
+	    epoll_ctl_type = EPOLL_CTL_ADD;
+	}
+
+	/* Update the state */
+	F->epoll_state = ev.events;
+
+	if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) {
+	    debug(5, 1) ("commSetSelect: epoll_ctl(%s): failed on fd=%d: %s\n",
+		epolltype_atoi(epoll_ctl_type), fd, xstrerror());
+	}
+    }
+    if (timeout)
+	F->timeout = squid_curtime + timeout;
+}
+
+int
+comm_epoll(int msec)
+{
+    struct timespec ts;
+    static time_t last_timeout = 0;
+    int i;
+    int num;
+    int fd;
+    fde *F;
+    PF *hdl;
+    struct epoll_event *cevents;
+    double timeout = current_dtime + (msec / 1000.0);
+
+    if (msec > MAX_POLL_TIME)
+	msec = MAX_POLL_TIME;
+
+    debug(50, 3) ("comm_epoll: timeout %d\n", msec);
+
+    do {
+#if !ALARM_UPDATES_TIME
+	double start;
+	getCurrentTime();
+	start = current_dtime;
+#endif
+	ts.tv_sec = msec / 1000;
+	ts.tv_nsec = (msec % 1000) * 1000;
+
+	/* Check timeouts once per second */
+	if (last_timeout < squid_curtime) {
+	    last_timeout = squid_curtime;
+	    checkTimeouts();
+
+	    /* bring backed off connections back online */
+	    commEpollBackon();
+	}
+	/* Check for disk io callbacks */
+	storeDirCallback();
+
+	for (;;) {
+	    statCounter.syscalls.polls++;
+	    num = epoll_wait(kdpfd, pevents, SQUID_MAXFD, msec);
+	    statCounter.select_loops++;
+
+	    if (num >= 0)
+		break;
+
+	    if (ignoreErrno(errno))
+		break;
+
+	    debug(5, 0) ("comm_epoll: epoll failure: %s\n", xstrerror());
+
+	    return COMM_ERROR;
+	}
+
+	statHistCount(&statCounter.select_fds_hist, num);
+
+	if (num <= 0)
+	    continue;
+
+	for (i = 0, cevents = pevents; i < num; i++, cevents++) {
+	    fd = cevents->data.fd;
+	    F = &fd_table[fd];
+	    debug(5, 8) ("comm_epoll(): got fd=%d events=%x monitoring=%x F->read_handler=%p F->write_handler=%p\n"
+		,fd, cevents->events, F->epoll_state, F->read_handler, F->write_handler);
+	    if (cevents->events & (EPOLLIN | EPOLLHUP | EPOLLERR)) {
+		if ((hdl = F->read_handler) != NULL) {
+		    // If the descriptor is meant to be deferred, don't handle
+		    if (commDeferRead(fd) == 1) {
+			if (!(F->epoll_backoff)) {
+			    debug(5, 1) ("comm_epoll(): WARNING defer handler for fd=%d (desc=%s) does not call commDeferFD() - backing off manually\n", fd, F->desc);
+			    commEpollBackoff(fd);
+			}
+			goto WRITE_EVENT;
+		    }
+		    debug(5, 8) ("comm_epoll(): Calling read handler on fd=%d\n", fd);
+		    F->read_handler = NULL;
+		    hdl(fd, F->read_data);
+		    statCounter.select_fds++;
+		    if ((F->read_handler == NULL) && (F->flags.open)) {
+			commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0);
+		    }
+		} else if (cevents->events & EPOLLIN) {
+		    debug(5, 2) ("comm_epoll(): no read handler for fd=%d", fd);
+		    if (F->flags.open) {
+			commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0);
+		    }
+		}
+	    }
+	  WRITE_EVENT:
+	    if (cevents->events & (EPOLLOUT | EPOLLHUP | EPOLLERR)) {
+		if ((hdl = F->write_handler) != NULL) {
+		    debug(5, 8) ("comm_epoll(): Calling write handler on fd=%d\n", fd);
+		    F->write_handler = NULL;
+		    hdl(fd, F->write_data);
+		    statCounter.select_fds++;
+		    if ((F->write_handler == NULL) && (F->flags.open)) {
+			commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0);
+		    }
+		} else if (cevents->events & EPOLLOUT) {
+		    debug(5, 2) ("comm_epoll(): no write handler for fd=%d\n", fd);
+		    if (F->flags.open) {
+			commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0);
+		    }
+		}
+	    }
+	}
+#if !ALARM_UPDATES_TIME
+	getCurrentTime();
+	statCounter.select_time += (current_dtime - start);
+#endif
+	return COMM_OK;
+    }
+    while (timeout > current_dtime);
+
+    debug(5, 8) ("comm_epoll: time out: %ld.\n", (long int) squid_curtime);
+    return COMM_TIMEOUT;
+}
+
+static int
+commDeferRead(int fd)
+{
+    fde *F = &fd_table[fd];
+    if (F->defer_check == NULL)
+	return 0;
+    return F->defer_check(fd, F->defer_data);
+}
+
+static void
+checkTimeouts(void)
+{
+    int fd;
+    fde *F = NULL;
+    PF *callback;
+    for (fd = 0; fd <= Biggest_FD; fd++) {
+	F = &fd_table[fd];
+	if (!F->flags.open)
+	    continue;
+	if (F->timeout == 0)
+	    continue;
+	if (F->timeout > squid_curtime)
+	    continue;
+	debug(5, 5) ("checkTimeouts: FD %d Expired\n", fd);
+	if (F->timeout_handler) {
+	    debug(5, 5) ("checkTimeouts: FD %d: Call timeout handler\n", fd);
+	    callback = F->timeout_handler;
+	    F->timeout_handler = NULL;
+	    callback(fd, F->timeout_data);
+	} else {
+	    debug(5, 5) ("checkTimeouts: FD %d: Forcing comm_close()\n", fd);
+	    comm_close(fd);
+	}
+    }
+}
+
+
+/* Called by async-io or diskd to speed up the polling */
+void
+comm_quick_poll_required(void)
+{
+    MAX_POLL_TIME = 10;
+}
+
+#endif
Index: src/Makefile.am
===================================================================
RCS file: /server/cvs-server/squid/squid/src/Makefile.am,v
retrieving revision 1.44
diff -u -r1.44 Makefile.am
--- src/Makefile.am	23 May 2006 21:52:27 -0000	1.44
+++ src/Makefile.am	24 May 2006 14:31:51 -0000
@@ -115,6 +115,8 @@
 	client_side.c \
 	comm.c \
 	comm_select.c \
+	comm_poll.c \
+	comm_epoll.c \
 	debug.c \
 	defines.h \
 	$(DELAY_POOL_SOURCE) \
Index: src/client_side.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/client_side.c,v
retrieving revision 1.621
diff -u -r1.621 client_side.c
--- src/client_side.c	22 May 2006 22:16:37 -0000	1.621
+++ src/client_side.c	24 May 2006 14:31:52 -0000
@@ -3290,7 +3290,7 @@
     ConnStateData *conn = data;
     if (conn->body.size_left && !F->flags.socket_eof) {
 	if (conn->in.offset >= conn->in.size - 1) {
-#if HAVE_EPOLL
+#if USE_EPOLL
 	    /* The commResumeFD function is called in this file */
 	    conn->in.clientfd = fd;
 	    commDeferFD(fd);
@@ -3301,7 +3301,7 @@
 	}
     } else {
 	if (conn->defer.until > squid_curtime) {
-#if HAVE_EPOLL
+#if USE_EPOLL
 	    /* This is a second resolution timer, so commEpollBackon will 
 	     * handle the resume for this defer call */
 	    commDeferFD(fd);
@@ -3709,7 +3709,7 @@
 	conn->body.size_left -= size;
 	/* Move any remaining data */
 	conn->in.offset -= size;
-#if HAVE_EPOLL
+#if USE_EPOLL
 	/* Resume the fd if necessary */
 	if (conn->in.clientfd) {
 	    if (conn->in.offset < conn->in.size - 1) {
Index: src/comm.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/comm.c,v
retrieving revision 1.342
diff -u -r1.342 comm.c
--- src/comm.c	22 May 2006 18:55:23 -0000	1.342
+++ src/comm.c	24 May 2006 14:31:52 -0000
@@ -375,7 +375,7 @@
 	commSetTcpNoDelay(cs->fd);
 #endif
 
-#if HAVE_EPOLL
+#if USE_EPOLL
     // If we are using epoll(), we need to make sure that this fd will be polled
     commSetSelect(cs->fd, 0, NULL, NULL, 0);
 #endif
@@ -779,7 +779,7 @@
 }
 
 /* Epoll redefines this function in comm_select.c */
-#if !HAVE_EPOLL
+#if !USE_EPOLL
 void
 commSetSelect(int fd, unsigned int type, PF * handler, void *client_data, time_t timeout)
 {
Index: src/comm_select.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/comm_select.c,v
retrieving revision 1.66
diff -u -r1.66 comm_select.c
--- src/comm_select.c	22 May 2006 19:01:32 -0000	1.66
+++ src/comm_select.c	24 May 2006 14:31:52 -0000
@@ -34,9 +34,10 @@
 
 #include "squid.h"
 
+#if USE_SELECT
+
 static int MAX_POLL_TIME = 1000;	/* see also comm_quick_poll_required() */
 
-#if !HAVE_EPOLL
 
 #ifndef        howmany
 #define howmany(x, y)   (((x)+((y)-1))/(y))
@@ -765,7 +766,7 @@
 		default:
 		    fatalf("bad return value from commDeferRead(FD %d)\n", fd);
 		}
-		if (FD_ISSET(fd, &readfds) && fd_table[fd].flags.read_pending) {
+		if (FD_ISSET(fd, &readfds) && fd_table[fd].read_pending & COMM_PENDING_NORMAL) {
 		    FD_SET(fd, &pendingfds);
 		    pending++;
 		}
@@ -1126,381 +1127,6 @@
     }
 }
 
-#else /* HAVE_EPOLL */
-/* epoll structs */
-static int kdpfd;
-static struct epoll_event *pevents;
-
-/* Array to keep track of backed off filedescriptors */
-static int backoff_fds[FD_SETSIZE];
-
-static void checkTimeouts(void);
-static int commDeferRead(int fd);
-
-static const char *
-epolltype_atoi(int x)
-{
-    switch (x) {
-
-    case EPOLL_CTL_ADD:
-	return "EPOLL_CTL_ADD";
-
-    case EPOLL_CTL_DEL:
-	return "EPOLL_CTL_DEL";
-
-    case EPOLL_CTL_MOD:
-	return "EPOLL_CTL_MOD";
-
-    default:
-	return "UNKNOWN_EPOLLCTL_OP";
-    }
-}
-
-/* Bring all fds back online */
-void
-commEpollBackon()
-{
-    fde *F;
-    int i;
-    int fd;
-    int j = 0;
-
-    for (i = 0; i < FD_SETSIZE; i++) {
-	if (backoff_fds[i]) {
-	    /* record the fd and zero the descriptor */
-	    fd = backoff_fds[i];
-	    F = &fd_table[fd];
-	    backoff_fds[i] = 0;
-
-	    /* If the fd is no longer backed off, ignore */
-	    if (!(F->epoll_backoff)) {
-		continue;
-	    }
-	    /* If the fd is still meant to be backed off, add it to the start of
-	     * the list and continue */
-	    if (commDeferRead(fd) == 1) {
-		backoff_fds[j++] = fd;
-		continue;
-	    }
-	    debug(5, 4) ("commEpollBackon: fd=%d\n", fd);
-
-	    /* Resume operations for this fd */
-	    commResumeFD(fd);
-	} else {
-	    /* Once we hit a non-backed off FD, we can break */
-	    break;
-	}
-    }
-}
-
-
-/* Back off on the next epoll for the given fd */
-void
-commEpollBackoff(int fd)
-{
-    commDeferFD(fd);
-}
-
-/* Defer reads from this fd */
-void
-commDeferFD(int fd)
-{
-    fde *F = &fd_table[fd];
-    struct epoll_event ev;
-    int epoll_ctl_type = 0;
-    int i;
-
-    /* Return if the fd is already backed off */
-    if (F->epoll_backoff) {
-	return;
-    }
-    for (i = 0; i < FD_SETSIZE; i++) {
-	if (!(backoff_fds[i]) || (backoff_fds[i] == fd)) {
-	    break;
-	}
-    }
-
-    /* die if we have no fd (very unlikely), if the fd has no existing epoll 
-     * state, if we are given a bad fd, or if the fd is not open. */
-    assert(i < FD_SETSIZE);
-    assert(F->epoll_state);
-    assert(fd >= 0);
-    assert(F->flags.open);
-
-    /* set up ev struct */
-    ev.events = 0;
-    ev.data.fd = fd;
-
-    /* If we were only waiting for reads, delete the fd, otherwise remove the 
-     * read event */
-    if (F->epoll_state == (EPOLLIN | EPOLLHUP | EPOLLERR)) {
-	epoll_ctl_type = EPOLL_CTL_DEL;
-    } else {
-	epoll_ctl_type = EPOLL_CTL_MOD;
-	ev.events = (F->epoll_state - EPOLLIN);
-    }
-    debug(5, 5) ("commDeferFD: epoll_ctl_type=%s, fd=%d epoll_state=%d\n", epolltype_atoi(epoll_ctl_type), fd, F->epoll_state);
-
-    if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) {
-	/* If an error occurs, log it */
-	debug(5, 1) ("commDeferFD: epoll_ctl(,%s,,): failed on fd=%d: %s\n", epolltype_atoi(epoll_ctl_type), fd, xstrerror());
-    } else {
-	backoff_fds[i] = fd;
-	F->epoll_backoff = 1;
-	F->epoll_state = ev.events;
-    }
-}
-
-/* Resume reading from the given fd */
-void
-commResumeFD(int fd)
-{
-    struct epoll_event ev;
-    int epoll_ctl_type = 0;
-    fde *F;
-
-    F = &fd_table[fd];
-
-    /* If the fd has been modified, do nothing and remove the flag */
-    if (!(F->read_handler) || !(F->epoll_backoff)) {
-	debug(5, 2) ("commResumeFD: fd=%d ignoring read_handler=%p, epoll_backoff=%d\n", fd, F->read_handler, F->epoll_backoff);
-	F->epoll_backoff = 0;
-	return;
-    }
-    /* we need to re-add the fd to the epoll list with EPOLLIN set */
-    ev.events = F->epoll_state | EPOLLIN | EPOLLHUP | EPOLLERR;
-    ev.data.fd = fd;
-
-    /* If epoll_state is not set, then this fd is only waiting for
-     * reads, and needs adding, otherwise we mod it to add  EPOLLIN */
-    if (!(F->epoll_state)) {
-	epoll_ctl_type = EPOLL_CTL_ADD;
-    } else {
-	epoll_ctl_type = EPOLL_CTL_MOD;
-    }
-    debug(5, 5) ("commResumeFD: epoll_ctl_type=%s, fd=%d\n", epolltype_atoi(epoll_ctl_type), fd);
-
-    /* Try and add the fd back into the epoll struct */
-    if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) {
-	/* If an error occurs, log */
-	debug(5, 1) ("commResumeFD: epoll_ctl(,%s,,): failed on fd=%d: %s\n", epolltype_atoi(epoll_ctl_type), fd, xstrerror());
-    } else {
-	F->epoll_backoff = 0;
-	F->epoll_state = ev.events;
-    }
-}
-void
-comm_select_init()
-{
-    int i;
-    pevents = (struct epoll_event *) xmalloc(SQUID_MAXFD * sizeof(struct epoll_event));
-    if (!pevents) {
-	fatalf("comm_select_init: xmalloc() failed: %s\n", xstrerror());
-    }
-    kdpfd = epoll_create(SQUID_MAXFD);
-
-    if (kdpfd < 0) {
-	fatalf("comm_select_init: epoll_create(): %s\n", xstrerror());
-    }
-    for (i = 0; i < FD_SETSIZE; i++) {
-	backoff_fds[i] = 0;
-    }
-}
-
-void
-commUpdateReadBits(int fd, PF * handler)
-{
-    /* Not imlpemented */
-}
-
-void
-commUpdateWriteBits(int fd, PF * handler)
-{
-    /* Not imlpemented */
-}
-
-void
-commSetSelect(int fd, unsigned int type, PF * handler, void *client_data, time_t timeout)
-{
-    fde *F = &fd_table[fd];
-    int epoll_ctl_type = 0;
-    struct epoll_event ev;
-
-    assert(fd >= 0);
-    assert(F->flags.open);
-    debug(5, 8) ("commSetSelect(fd=%d,type=%u,handler=%p,client_data=%p,timeout=%ld)\n", fd, type, handler, client_data, timeout);
-
-    ev.events = 0;
-    ev.data.fd = fd;
-
-    if (type & COMM_SELECT_READ) {
-	/* Only add the epoll event if the fd is not backed off */
-	if (handler && !(F->epoll_backoff)) {
-	    ev.events |= EPOLLIN;
-	}
-	F->read_handler = handler;
-	F->read_data = client_data;
-
-	// Otherwise, use previously stored value if the fd is not backed off
-    } else if ((F->epoll_state & EPOLLIN) && (F->read_handler) && !(F->epoll_backoff)) {
-	ev.events |= EPOLLIN;
-    }
-    if (type & COMM_SELECT_WRITE) {
-	if (handler) {
-	    ev.events |= EPOLLOUT;
-	}
-	F->write_handler = handler;
-	F->write_data = client_data;
-
-	// Otherwise, use previously stored value
-    } else if ((F->epoll_state & EPOLLOUT) && (F->write_handler)) {
-	ev.events |= EPOLLOUT;
-    }
-    if (ev.events) {
-	ev.events |= EPOLLHUP | EPOLLERR;
-    }
-    /* If the type is 0, force adding the fd to the epoll set */
-    if (!(type)) {
-	F->epoll_state = 0;
-    }
-    if (ev.events != F->epoll_state) {
-	// If the struct is already in epoll MOD or DEL, else ADD
-	if (F->epoll_state) {
-	    epoll_ctl_type = ev.events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL;
-	} else {
-	    epoll_ctl_type = EPOLL_CTL_ADD;
-	}
-
-	/* Update the state */
-	F->epoll_state = ev.events;
-
-	if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) {
-	    debug(5, 1) ("commSetSelect: epoll_ctl(%s): failed on fd=%d: %s\n",
-		epolltype_atoi(epoll_ctl_type), fd, xstrerror());
-	}
-    }
-    if (timeout)
-	F->timeout = squid_curtime + timeout;
-}
-
-int
-comm_epoll(int msec)
-{
-    struct timespec ts;
-    static time_t last_timeout = 0;
-    int i;
-    int num;
-    int fd;
-    fde *F;
-    PF *hdl;
-    struct epoll_event *cevents;
-    double timeout = current_dtime + (msec / 1000.0);
-
-    if (msec > MAX_POLL_TIME)
-	msec = MAX_POLL_TIME;
-
-    debug(50, 3) ("comm_epoll: timeout %d\n", msec);
-
-    do {
-#if !ALARM_UPDATES_TIME
-	double start;
-	getCurrentTime();
-	start = current_dtime;
-#endif
-	ts.tv_sec = msec / 1000;
-	ts.tv_nsec = (msec % 1000) * 1000;
-
-	/* Check timeouts once per second */
-	if (last_timeout < squid_curtime) {
-	    last_timeout = squid_curtime;
-	    checkTimeouts();
-
-	    /* bring backed off connections back online */
-	    commEpollBackon();
-	}
-	/* Check for disk io callbacks */
-	storeDirCallback();
-
-	for (;;) {
-	    statCounter.syscalls.polls++;
-	    num = epoll_wait(kdpfd, pevents, SQUID_MAXFD, msec);
-	    statCounter.select_loops++;
-
-	    if (num >= 0)
-		break;
-
-	    if (ignoreErrno(errno))
-		break;
-
-	    debug(5, 0) ("comm_epoll: epoll failure: %s\n", xstrerror());
-
-	    return COMM_ERROR;
-	}
-
-	statHistCount(&statCounter.select_fds_hist, num);
-
-	if (num <= 0)
-	    continue;
-
-	for (i = 0, cevents = pevents; i < num; i++, cevents++) {
-	    fd = cevents->data.fd;
-	    F = &fd_table[fd];
-	    debug(5, 8) ("comm_epoll(): got fd=%d events=%x monitoring=%x F->read_handler=%p F->write_handler=%p\n"
-		,fd, cevents->events, F->epoll_state, F->read_handler, F->write_handler);
-	    if (cevents->events & (EPOLLIN | EPOLLHUP | EPOLLERR)) {
-		if ((hdl = F->read_handler) != NULL) {
-		    // If the descriptor is meant to be deferred, don't handle
-		    if (commDeferRead(fd) == 1) {
-			if (!(F->epoll_backoff)) {
-			    debug(5, 1) ("comm_epoll(): WARNING defer handler for fd=%d (desc=%s) does not call commDeferFD() - backing off manually\n", fd, F->desc);
-			    commEpollBackoff(fd);
-			}
-			goto WRITE_EVENT;
-		    }
-		    debug(5, 8) ("comm_epoll(): Calling read handler on fd=%d\n", fd);
-		    F->read_handler = NULL;
-		    hdl(fd, F->read_data);
-		    statCounter.select_fds++;
-		    if ((F->read_handler == NULL) && (F->flags.open)) {
-			commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0);
-		    }
-		} else if (cevents->events & EPOLLIN) {
-		    debug(5, 2) ("comm_epoll(): no read handler for fd=%d", fd);
-		    if (F->flags.open) {
-			commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0);
-		    }
-		}
-	    }
-	  WRITE_EVENT:
-	    if (cevents->events & (EPOLLOUT | EPOLLHUP | EPOLLERR)) {
-		if ((hdl = F->write_handler) != NULL) {
-		    debug(5, 8) ("comm_epoll(): Calling write handler on fd=%d\n", fd);
-		    F->write_handler = NULL;
-		    hdl(fd, F->write_data);
-		    statCounter.select_fds++;
-		    if ((F->write_handler == NULL) && (F->flags.open)) {
-			commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0);
-		    }
-		} else if (cevents->events & EPOLLOUT) {
-		    debug(5, 2) ("comm_epoll(): no write handler for fd=%d\n", fd);
-		    if (F->flags.open) {
-			commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0);
-		    }
-		}
-	    }
-	}
-#if !ALARM_UPDATES_TIME
-	getCurrentTime();
-	statCounter.select_time += (current_dtime - start);
-#endif
-	return COMM_OK;
-    }
-    while (timeout > current_dtime);
-
-    debug(5, 8) ("comm_epoll: time out: %ld.\n", (long int) squid_curtime);
-    return COMM_TIMEOUT;
-}
-#endif /* HAVE_EPOLL  */
 
 static int
 commDeferRead(int fd)
@@ -1545,3 +1171,5 @@
 {
     MAX_POLL_TIME = 10;
 }
+
+#endif
Index: src/fd.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/fd.c,v
retrieving revision 1.46
diff -u -r1.46 fd.c
--- src/fd.c	18 May 2006 04:03:23 -0000	1.46
+++ src/fd.c	24 May 2006 14:31:52 -0000
@@ -84,7 +84,7 @@
 	assert(F->write_handler == NULL);
     }
     debug(51, 3) ("fd_close FD %d %s\n", fd, F->desc);
-#if HAVE_EPOLL
+#if USE_EPOLL
     /* the epoll code needs to update the descriptor before flags.ope is 0 */
     commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0);
     commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0);
Index: src/forward.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/forward.c,v
retrieving revision 1.99
diff -u -r1.99 forward.c
--- src/forward.c	24 May 2006 11:37:30 -0000	1.99
+++ src/forward.c	24 May 2006 14:31:52 -0000
@@ -873,7 +873,7 @@
     else {
 	int i = delayMostBytesWanted(mem, INT_MAX);
 	if (0 == i) {
-#if HAVE_EPOLL
+#if USE_EPOLL
 	    if (fd >= 0) {
 		mem->serverfd = fd;
 		commDeferFD(fd);
@@ -899,7 +899,7 @@
 	 * few other corner cases.
 	 */
 	if (fd >= 0 && mem->inmem_hi - mem->inmem_lo > SM_PAGE_SIZE + Config.Store.maxInMemObjSize + READ_AHEAD_GAP) {
-#if HAVE_EPOLL
+#if USE_EPOLL
 	    EBIT_SET(e->flags, ENTRY_DEFER_READ);
 	    mem->serverfd = fd;
 	    commDeferFD(fd);
@@ -909,7 +909,7 @@
     }
     if (fd >= 0 && mem->inmem_hi - storeLowestMemReaderOffset(e) > READ_AHEAD_GAP) {
 	EBIT_SET(e->flags, ENTRY_DEFER_READ);
-#if HAVE_EPOLL
+#if USE_EPOLL
 	mem->serverfd = fd;
 	commDeferFD(fd);
 #endif
Index: src/main.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/main.c,v
retrieving revision 1.366
diff -u -r1.366 main.c
--- src/main.c	22 May 2006 21:54:56 -0000	1.366
+++ src/main.c	24 May 2006 14:31:52 -0000
@@ -776,12 +776,14 @@
 	eventRun();
 	if ((loop_delay = eventNextTime()) < 0)
 	    loop_delay = 0;
-#if HAVE_EPOLL
+#if USE_EPOLL
 	switch (comm_epoll(loop_delay)) {
-#elif HAVE_POLL
+#elif USE_POLL
 	    switch (comm_poll(loop_delay)) {
-#else
+#elif USE_SELECT
 	switch (comm_select(loop_delay)) {
+#elif
+#error Need poll, epoll or select defined!
 #endif
 	case COMM_OK:
 	    errcount = 0;	/* reset if successful */
Index: src/protos.h
===================================================================
RCS file: /server/cvs-server/squid/squid/src/protos.h,v
retrieving revision 1.469
diff -u -r1.469 protos.h
--- src/protos.h	22 May 2006 23:05:27 -0000	1.469
+++ src/protos.h	24 May 2006 14:31:52 -0000
@@ -188,12 +188,14 @@
  * comm_select.c
  */
 extern void comm_select_init(void);
-#if HAVE_EPOLL
+#if USE_EPOLL
 extern int comm_epoll(int);
-#elif HAVE_POLL
+#elif USE_POLL
 extern int comm_poll(int);
-#else
+#elif USE_SELECT
 extern int comm_select(int);
+#else
+#error USE_POLL, USE_EPOLL or USE_SELECT need to be defined!
 #endif
 extern void commUpdateReadBits(int, PF *);
 extern void commUpdateWriteBits(int, PF *);
Index: src/squid.h
===================================================================
RCS file: /server/cvs-server/squid/squid/src/squid.h,v
retrieving revision 1.234
diff -u -r1.234 squid.h
--- src/squid.h	22 May 2006 18:55:23 -0000	1.234
+++ src/squid.h	24 May 2006 14:31:52 -0000
@@ -250,15 +250,15 @@
  *  -- Oskar Pearson <oskar@is.co.za>
  *  -- Stewart Forster <slf@connect.com.au>
  */
-#if HAVE_POLL
+#if USE_POLL
 #if HAVE_POLL_H
 #include <poll.h>
 #else /* HAVE_POLL_H */
 #undef HAVE_POLL
 #endif /* HAVE_POLL_H */
-#endif /* HAVE_POLL */
+#endif /* USE_POLL */
 
-#if HAVE_EPOLL
+#if USE_EPOLL
 #include <sys/epoll.h>
 #endif
 
@@ -393,7 +393,7 @@
 #include "Stack.h"
 
 /* Needed for poll() on Linux at least */
-#if HAVE_POLL
+#if USE_POLL
 #ifndef POLLRDNORM
 #define POLLRDNORM POLLIN
 #endif
Index: src/ssl.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/ssl.c,v
retrieving revision 1.130
diff -u -r1.130 ssl.c
--- src/ssl.c	18 May 2006 04:03:24 -0000	1.130
+++ src/ssl.c	24 May 2006 14:31:52 -0000
@@ -139,7 +139,7 @@
     if (i == INT_MAX)
 	return 0;
     if (i == 0) {
-#if HAVE_EPOLL
+#if USE_EPOLL
 	commDeferFD(fd);
 #endif
 	return 1;
Index: src/stat.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/stat.c,v
retrieving revision 1.366
diff -u -r1.366 stat.c
--- src/stat.c	18 May 2006 04:03:24 -0000	1.366
+++ src/stat.c	24 May 2006 14:31:52 -0000
@@ -835,7 +835,7 @@
     storeAppendPrintf(sentry, "aborted_requests = %f/sec\n",
 	XAVG(aborted_requests));
 
-#if HAVE_POLL || HAVE_EPOLL
+#if USE_POLL || USE_EPOLL
     storeAppendPrintf(sentry, "syscalls.polls = %f/sec\n", XAVG(syscalls.polls));
 #else
     storeAppendPrintf(sentry, "syscalls.selects = %f/sec\n", XAVG(syscalls.selects));
Index: src/store_client.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/store_client.c,v
retrieving revision 1.118
diff -u -r1.118 store_client.c
--- src/store_client.c	20 May 2006 12:58:51 -0000	1.118
+++ src/store_client.c	24 May 2006 14:31:52 -0000
@@ -285,7 +285,7 @@
 	    debug(20, 3) ("storeClientCopy3: %s - clearing ENTRY_DEFER_READ\n", e->mem_obj->url);
 	    /* Clear the flag and re-poll the fd */
 	    EBIT_CLR(e->flags, ENTRY_DEFER_READ);
-#if HAVE_EPOLL
+#if USE_EPOLL
 	    if (mem->serverfd != 0) {
 		commResumeFD(mem->serverfd);
 		mem->serverfd = 0;
Index: src/store_swapout.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/store_swapout.c,v
retrieving revision 1.91
diff -u -r1.91 store_swapout.c
--- src/store_swapout.c	18 May 2006 12:48:51 -0000	1.91
+++ src/store_swapout.c	24 May 2006 14:31:52 -0000
@@ -152,7 +152,7 @@
 
 	    if (mem->inmem_hi - mem->inmem_lo <= READ_AHEAD_GAP) {
 		EBIT_CLR(e->flags, ENTRY_DEFER_READ);
-#if HAVE_EPOLL
+#if USE_EPOLL
 		if (mem->serverfd != 0) {
 		    commResumeFD(mem->serverfd);
 		    mem->serverfd = 0;
Index: src/structs.h
===================================================================
RCS file: /server/cvs-server/squid/squid/src/structs.h,v
retrieving revision 1.457
diff -u -r1.457 structs.h
--- src/structs.h	22 May 2006 22:24:09 -0000	1.457
+++ src/structs.h	24 May 2006 14:31:53 -0000
@@ -2011,7 +2011,7 @@
 	    int recvfroms;
 	    int sendtos;
 	} sock;
-#if HAVE_POLL || HAVE_EPOLL
+#if USE_POLL || USE_EPOLL
 	int polls;
 #else
 	int selects;
Index: configure.in
===================================================================
RCS file: /server/cvs-server/squid/squid/configure.in,v
retrieving revision 1.334
diff -u -r1.334 configure.in
--- configure.in	24 May 2006 11:20:11 -0000	1.334
+++ configure.in	24 May 2006 14:31:53 -0000
@@ -783,6 +783,24 @@
   esac
 ])
 
+dnl Enable select()
+AC_ARG_ENABLE(select,
+[  --enable-select         Enable select() support.
+  --disable-select        Disable select() support. ],
+
+[
+  case "$enableval" in
+  yes)
+    echo "Forcing select() to be enabled"
+    ac_cv_func_select='yes'
+    ;;
+  no)
+    echo "Forcing select() to be disabled"
+    ac_cv_func_select='no'
+    ;;
+  esac
+])
+
 dnl Enable epoll()
 AC_ARG_ENABLE(epoll,
 [  --enable-epoll          Enable epoll() instead of poll() or select().  
@@ -2082,6 +2100,31 @@
 	bswap_32 \
 )
 
+dnl Magic which checks whether we are forcing a type of comm loop we
+dnl are actually going to (ab)use
+
+dnl Actually do the define magic now
+dnl mostly ripped from squid-commloops, thanks to adrian and benno
+
+if test "$ac_cv_func_epoll" = "yes" ; then
+        SELECT_TYPE="epoll"
+        AC_DEFINE(USE_EPOLL,1,[Use epoll() for the IO loop])
+        AC_CHECK_LIB(epoll, epoll_create, [EPOLL_LIBS="-lepoll"])
+        AC_SUBST(EPOLL_LIBS)
+elif test "$ac_cv_func_poll" = "yes" ; then
+        SELECT_TYPE="poll"
+        AC_DEFINE(USE_POLL,1,[Use poll() for the IO loop])
+elif test "$ac_cv_func_select" = "yes" ; then
+        SELECT_TYPE="select"
+        AC_DEFINE(USE_SELECT,1,[Use select() for the IO loop])
+else
+        echo "Eep!  Can't find poll, epoll, or select!"
+        echo "I'll try select and hope for the best."
+        SELECT_TYPE="select"
+        AC_DEFINE(USE_SELECT,1)
+fi
+echo "Using ${SELECT_TYPE} for select loop."
+
 dnl Yay!  Another Linux brokenness.  Its not good enough
 dnl to know that setresuid() exists, because RedHat 5.0 declares
 dnl setresuid() but doesn't implement it.
Index: include/autoconf.h.in
===================================================================
RCS file: /server/cvs-server/squid/squid/include/autoconf.h.in,v
retrieving revision 1.140
diff -u -r1.140 autoconf.h.in
--- include/autoconf.h.in	23 May 2006 14:54:36 -0000	1.140
+++ include/autoconf.h.in	24 May 2006 14:31:53 -0000
@@ -753,6 +753,9 @@
    dnsserver processes instead. */
 #undef USE_DNSSERVERS
 
+/* Use epoll() for the IO loop */
+#undef USE_EPOLL
+
 /* Define if we should use GNU regex */
 #undef USE_GNUREGEX
 
@@ -778,10 +781,16 @@
    (USE_SSL) */
 #undef USE_OPENSSL
 
+/* Use poll() for the IO loop */
+#undef USE_POLL
+
 /* If you want to log Referer request header values, define this. By default,
    they are written to referer.log in the Squid log directory. */
 #undef USE_REFERER_LOG
 
+/* Use select() for the IO loop */
+#undef USE_SELECT
+
 /* Define this to include code for SSL encryption. */
 #undef USE_SSL
 

