Profiling, debugging, and some optimization/fixes.

=== modified file 'include/MemPoolMalloc.h'
--- include/MemPoolMalloc.h	2010-05-29 00:59:35 +0000
+++ include/MemPoolMalloc.h	2010-10-23 04:04:00 +0000
@@ -21,6 +21,7 @@
  */
 
 #include "MemPool.h"
+#include <list>
 
 /// \ingroup MemPoolsAPI
 class MemPoolMalloc : public MemImplementingAllocator
@@ -42,7 +43,7 @@
     virtual void *allocate();
     virtual void deallocate(void *, bool aggressive);
 private:
-    Stack<void *> freelist;
+    std::list<void *> freelist;
 };
 
 

=== modified file 'include/profiling.h'
--- include/profiling.h	2010-07-07 15:48:48 +0000
+++ include/profiling.h	2010-10-25 02:15:23 +0000
@@ -216,8 +216,10 @@
 
 #else /* USE_XPROF_STATS */
 
+#ifndef PROF_start
 #define PROF_start(ARGS) ((void)0)
 #define PROF_stop(ARGS) ((void)0)
+#endif
 
 #endif /* USE_XPROF_STATS */
 

=== modified file 'lib/MemPool.cc'
--- lib/MemPool.cc	2010-05-30 09:13:54 +0000
+++ lib/MemPool.cc	2010-10-23 00:46:45 +0000
@@ -247,6 +247,7 @@
 {
     assert(obj != NULL);
     (void) VALGRIND_CHECK_MEM_IS_ADDRESSABLE(obj, obj_size);
+assert(MemPools::GetInstance().mem_idle_limit != 0);
     deallocate(obj, MemPools::GetInstance().mem_idle_limit == 0);
     ++free_calls;
 }

=== modified file 'lib/MemPoolMalloc.cc'
--- lib/MemPoolMalloc.cc	2010-05-30 00:15:16 +0000
+++ lib/MemPoolMalloc.cc	2010-10-23 01:05:56 +0000
@@ -54,8 +54,11 @@
 void *
 MemPoolMalloc::allocate()
 {
-    void *obj = freelist.pop();
-    if (obj) {
+    void *obj = NULL;
+    if (!freelist.empty()) {
+        obj = freelist.back();
+        assert(obj);
+        freelist.pop_back();
         memMeterDec(meter.idle);
         saved_calls++;
     } else {
@@ -127,13 +130,15 @@
 bool
 MemPoolMalloc::idleTrigger(int shift) const
 {
-    return freelist.count >> (shift ? 8 : 0);
+    return freelist.size() >> (shift ? 8 : 0);
 }
 
 void
 MemPoolMalloc::clean(time_t maxage)
 {
-    while (void *obj = freelist.pop()) {
+    while (!freelist.empty()) {
+        void *obj = freelist.back();
+        freelist.pop_back();
         memMeterDec(meter.idle);
         memMeterDec(meter.alloc);
         xfree(obj);

=== modified file 'src/Debug.h'
--- src/Debug.h	2010-05-26 03:06:02 +0000
+++ src/Debug.h	2010-10-25 02:12:31 +0000
@@ -143,4 +143,16 @@
 /* Legacy debug function definitions */
 SQUIDCEXTERN void _db_print(const char *,...) PRINTF_FORMAT_ARG1;
 
+
+/* Live call chain progress profiling */
+extern bool ForceLiveProfiling; ///< turns all live profilers on
+extern int LiveProfilingDepth; ///< number of profilers in the call chain
+extern timeval LiveProfilingStart; ///< the start time of the call chain
+extern void ProfileProgress(const char * const aStartName, const int aStartLine);
+extern void LiveProfilingReport();
+extern unsigned int LivePorfilingEventCount;
+#define Here __FILE__, __LINE__
+
+#include "base/Profiling.h"
+
 #endif /* SQUID_DEBUG_H */

=== modified file 'src/EventLoop.cc'
--- src/EventLoop.cc	2010-05-26 03:06:02 +0000
+++ src/EventLoop.cc	2010-10-25 04:04:14 +0000
@@ -34,6 +34,7 @@
 
 #include "EventLoop.h"
 #include "base/AsyncCallQueue.h"
+#include "base/Profiling.h"
 
 EventLoop::EventLoop() : errcount(0), last_loop(false), timeService(NULL),
         primaryEngine(NULL)
@@ -42,13 +43,23 @@
 void
 EventLoop::checkEngine(AsyncEngine * engine, bool const primary)
 {
+    static u_int64_t visits = 0;
+    ForceLiveProfiling = !(++visits % 50000);
+    getCurrentTime();
+    LiveProfilingStart = current_time;
+    LiveProfilingDepth = 0;
+    LivePorfilingEventCount = 0;
+
     int requested_delay;
 
-    if (!primary)
+    if (!primary) {
+        ProfileProgress(Here);
         requested_delay = engine->checkEvents(0);
-    else
+     } else
         requested_delay = engine->checkEvents(loop_delay);
 
+     ProfileProgress(Here);
+
     if (requested_delay < 0)
         switch (requested_delay) {
 
@@ -71,6 +82,25 @@
         if (requested_delay < loop_delay)
             loop_delay = requested_delay;
     }
+
+
+    getCurrentTime();
+    const int spent = tvSubUsec(LiveProfilingStart, current_time);
+    if (ForceLiveProfiling) {
+        debugs(1, 1, "timed checkEvents(" << primary << "): " << spent);
+        //LiveProfilingReport();
+    }
+
+    static double totalTime = 0;
+    static u_int64_t totalCount = 0;
+    totalTime += spent;
+    totalCount += LivePorfilingEventCount;
+    if (totalCount > 5000000) {
+        debugs(1, 1, "main loop events: " << totalTime << " / " << totalCount << " = " << totalTime/totalCount);
+        totalTime = 0;
+        totalCount = 0;
+        ScopeInfo::ReportAll();
+    }
 }
 
 void

=== modified file 'src/HttpHeader.cc'
--- src/HttpHeader.cc	2010-10-14 04:16:15 +0000
+++ src/HttpHeader.cc	2010-10-24 22:20:26 +0000
@@ -394,6 +394,7 @@
 void
 HttpHeader::clean()
 {
+    ProfileProgress(Here);
     HttpHeaderPos pos = HttpHeaderInitPos;
     HttpHeaderEntry *e;
 
@@ -431,7 +432,9 @@
             delete e;
         }
     }
+    ProfileProgress(Here);
     entries.clean();
+    ProfileProgress(Here);
     httpHeaderMaskInit(&mask, 0);
     PROF_stop(HttpHeaderClean);
 }
@@ -826,7 +829,7 @@
 void
 HttpHeader::compact()
 {
-    entries.prune(NULL);
+    entries.prune();
 }
 
 /*
@@ -1838,3 +1841,48 @@
             refreshMask();
     }
 }
+
+
+
+HttpHeaderEntries::HttpHeaderEntries(): count(0)
+{
+}
+
+HttpHeaderEntries::~HttpHeaderEntries()
+{
+    assert(!count); // or we would leak entries
+    // TODO: we should own the entries and delete them when we are destructed
+}
+
+void
+HttpHeaderEntries::push_back(HttpHeaderEntry *e)
+{
+    assert(count < 1024); // XXX: hardcoded limit
+    items[count++] = e;
+}
+
+void
+HttpHeaderEntries::insert(HttpHeaderEntry *e)
+{
+    push_back(e); // XXX: old code inserted at position one (expensive!)
+}
+
+void
+HttpHeaderEntries::prune()
+{
+    size_t newCount = 0;
+    for (size_t i = 0; i < count; ++i) {
+        if (items[i]) {
+            if (newCount < i)
+                items[newCount] = items[i];
+            ++newCount;
+        }
+     }
+     count = newCount;
+}
+
+void
+HttpHeaderEntries::clean()
+{
+    count = 0;
+}

=== modified file 'src/HttpHeader.h'
--- src/HttpHeader.h	2010-08-25 00:14:25 +0000
+++ src/HttpHeader.h	2010-10-24 21:57:27 +0000
@@ -179,6 +179,22 @@
 /// \todo CLEANUP: Kill this.
 typedef struct _ETag ETag;
 
+class HttpHeaderEntry;
+
+class HttpHeaderEntries {
+public:
+    HttpHeaderEntries();
+    ~HttpHeaderEntries();
+
+    void push_back(HttpHeaderEntry *);
+    void insert(HttpHeaderEntry *);
+    void prune();
+    void clean();
+
+    size_t count;
+    HttpHeaderEntry *items[1024];
+};
+
 class HttpHeaderEntry
 {
 
@@ -257,7 +273,7 @@
     inline bool chunked() const; ///< whether message uses chunked Transfer-Encoding
 
     /* protected, do not use these, use interface functions instead */
-    Vector<HttpHeaderEntry *> entries;		/**< parsed fields in raw format */
+    HttpHeaderEntries entries;		/**< parsed fields in raw format */
     HttpHeaderMask mask;	/**< bit set <=> entry present */
     http_hdr_owner_type owner;	/**< request or reply */
     int len;			/**< length when packed, not counting terminating null-byte */

=== modified file 'src/MemObject.cc'
--- src/MemObject.cc	2009-08-04 02:31:27 +0000
+++ src/MemObject.cc	2010-10-24 16:33:01 +0000
@@ -137,6 +137,7 @@
 void
 MemObject::write ( StoreIOBuffer writeBuffer, STMCB *callback, void *callbackData)
 {
+    ProfileProgress(Here);
     PROF_start(MemObject_write);
     debugs(19, 6, "memWrite: offset " << writeBuffer.offset << " len " << writeBuffer.length);
 
@@ -149,7 +150,9 @@
     assert (data_hdr.endOffset() || writeBuffer.offset == 0);
 
     assert (data_hdr.write (writeBuffer));
+    ProfileProgress(Here);
     callback (callbackData, writeBuffer);
+    ProfileProgress(Here);
     PROF_stop(MemObject_write);
 }
 

=== modified file 'src/Server.cc'
--- src/Server.cc	2010-09-13 02:25:09 +0000
+++ src/Server.cc	2010-10-24 20:57:26 +0000
@@ -60,6 +60,7 @@
 #endif
         receivedWholeRequestBody(false)
 {
+    ProfileProgress(Here);
     fwd = theFwdState;
     entry = fwd->entry;
 

=== modified file 'src/base/AsyncCall.cc'
--- src/base/AsyncCall.cc	2009-04-09 22:31:13 +0000
+++ src/base/AsyncCall.cc	2010-10-24 01:52:02 +0000
@@ -28,10 +28,14 @@
 void
 AsyncCall::make()
 {
+    ProfileProgress(Here);
+
     debugs(debugSection, debugLevel, HERE << "make call " << name <<
            " [call"<< id << ']');
     if (canFire()) {
+    ProfileProgress(Here);
         fire();
+    ProfileProgress(Here);
         return;
     }
 

=== modified file 'src/base/AsyncCallQueue.cc'
--- src/base/AsyncCallQueue.cc	2009-04-09 22:31:13 +0000
+++ src/base/AsyncCallQueue.cc	2010-10-25 01:25:12 +0000
@@ -8,11 +8,12 @@
 
 #include "base/AsyncCallQueue.h"
 #include "base/AsyncCall.h"
+#include "SquidTime.h"
 
 AsyncCallQueue *AsyncCallQueue::TheInstance = 0;
 
 
-AsyncCallQueue::AsyncCallQueue(): theHead(NULL), theTail(NULL)
+AsyncCallQueue::AsyncCallQueue(): theHead(NULL), theTail(NULL), count(0)
 {
 }
 
@@ -27,6 +28,7 @@
     } else { // create queue from cratch
         theHead = theTail = call;
     }
+    ++count;
 }
 
 // Fire all scheduled calls; returns true if at least one call was fired.
@@ -48,10 +50,42 @@
     call->theNext = NULL;
     if (theTail == call)
         theTail = NULL;
+    --count;
+
+    static u_int64_t visits = 0;
+    ForceLiveProfiling = !(++visits % 50000);
+    getCurrentTime();
+    LiveProfilingStart = current_time;
+    LiveProfilingDepth = 0;
+    LivePorfilingEventCount = 0;
 
     debugs(call->debugSection, call->debugLevel, "entering " << *call);
+    ProfileProgress(Here);
     call->make();
+    ProfileProgress(Here);
     debugs(call->debugSection, call->debugLevel, "leaving " << *call);
+
+    getCurrentTime();
+    const int spent = tvSubUsec(LiveProfilingStart, current_time);
+    static double sum = 0;
+    sum += spent;
+    if (spent >= 500 || LiveProfilingDepth >= 100 || ForceLiveProfiling) {
+        debugs(call->debugSection, 1, "timed " << call->name << ": " << spent << ' ' << sum/visits << " depth: " << count);
+        LiveProfilingReport();
+    }
+
+    static double totalTime = 0;
+    static double totalCount = 0;
+    totalTime += spent;
+    totalCount += LivePorfilingEventCount;
+    if (totalCount > 10000000.0) {
+        debugs(1, 1, "async call events: " << totalTime << " / " << totalCount << " = " << totalTime/totalCount);
+        totalTime = 0;
+        totalCount = 0;
+    }
+
+    ForceLiveProfiling = false;
+    LiveProfilingDepth = 0;
 }
 
 AsyncCallQueue &

=== modified file 'src/base/AsyncCallQueue.h'
--- src/base/AsyncCallQueue.h	2009-04-09 22:31:13 +0000
+++ src/base/AsyncCallQueue.h	2010-10-23 22:39:36 +0000
@@ -33,6 +33,7 @@
 
     AsyncCall::Pointer theHead;
     AsyncCall::Pointer theTail;
+    int count;
 
     static AsyncCallQueue *TheInstance;
 };

=== added file 'src/base/Profiling.cc'
--- src/base/Profiling.cc	1970-01-01 00:00:00 +0000
+++ src/base/Profiling.cc	2010-10-25 04:06:24 +0000
@@ -0,0 +1,74 @@
+#include "base/Profiling.h"
+#include "Debug.h"
+#include <set>
+#include <algorithm>
+
+u_int64_t ScopeInfo::LastId = 0;
+
+static std::set<ScopeInfo*> Scopes;
+
+static u_int64_t MinReportVolume = 0;
+
+
+ScopeInfo::ScopeInfo(const char * const aName, const int aLine):
+                     totalTicks(0), totalVisits(0),
+                     fileName(aName), fileLine(aLine), id(++LastId)
+{
+    doRegister();
+}
+
+ScopeInfo::~ScopeInfo()
+{
+    unRegister();
+}
+
+void
+ScopeInfo::doRegister()
+{
+    Scopes.insert(this);
+}
+
+void
+ScopeInfo::unRegister()
+{
+    Scopes.erase(this);
+}
+
+void
+ScopeInfo::report()
+{
+    if (MinReportVolume && totalVisits >= MinReportVolume) {
+        debugs(1,1, fileName << ':' << fileLine << ": scope" << id << ": " <<
+               totalTicks << " / " << totalVisits << " = " <<
+               (static_cast<double>(totalTicks)/(totalVisits ? totalVisits : 1)));
+        totalVisits = 0;
+        totalTicks = 0;
+    }
+}
+
+static double TotalVisits = 0;
+static double TotalTicks = 0;
+
+static
+void ReportOne(ScopeInfo *scope)
+{
+    TotalTicks += scope->totalTicks;
+    TotalVisits += scope->totalVisits;
+    scope->report();
+}
+
+void
+ScopeInfo::ReportAll()
+{
+    TotalTicks = 0;
+    TotalVisits = 0;
+
+    debugs(1,1, "Total profiling scopes: " << Scopes.size());
+    MinReportVolume = 1;
+    std::for_each(Scopes.begin(), Scopes.end(), &ReportOne);
+    MinReportVolume = INT_MAX;
+
+    debugs(1,1, "All scopes: " <<
+           TotalTicks << " / " << TotalVisits << " = " <<
+           (static_cast<double>(TotalTicks)/(TotalVisits ? TotalVisits : 1)));
+}

=== added file 'src/base/Profiling.h'
--- src/base/Profiling.h	1970-01-01 00:00:00 +0000
+++ src/base/Profiling.h	2010-10-25 02:25:35 +0000
@@ -0,0 +1,112 @@
+#ifndef SQUID_BASE_PROFILING_H
+#define SQUID_BASE_PROFILING_H
+
+#include "config.h"
+
+/* start future ticking.h; XXX: move */
+
+#if !_SQUID_SOLARIS_
+typedef int64_t  hrtime_t;
+#endif
+
+#if defined(__GNUC__) && ( defined(__i386) || defined(__i386__) )
+static inline hrtime_t
+get_tick(void)
+{
+    hrtime_t regs;
+
+asm volatile ("rdtsc":"=A" (regs));
+    return regs;
+    /* We need return value, we rely on CC to optimise out needless subf calls */
+    /* Note that "rdtsc" is relatively slow OP and stalls the CPU pipes, so use it wisely */
+}
+
+#elif defined(__GNUC__) && ( defined(__x86_64) || defined(__x86_64__) )
+static inline hrtime_t
+get_tick(void)
+{
+    uint32_t lo, hi;
+    // Based on an example in Wikipedia
+    /* We cannot use "=A", since this would use %rax on x86_64 */
+asm volatile ("rdtsc" : "=a" (lo), "=d" (hi));
+    return (hrtime_t)hi << 32 | lo;
+}
+
+#elif defined(__GNUC__) && defined(__alpha)
+static inline hrtime_t
+get_tick(void)
+{
+    hrtime_t regs;
+
+asm volatile ("rpcc %0" : "=r" (regs));
+    return regs;
+}
+
+#elif defined(_M_IX86) && defined(_MSC_VER) /* x86 platform on Microsoft C Compiler ONLY */
+static __inline hrtime_t
+get_tick(void)
+{
+    hrtime_t regs;
+
+    __asm {
+        cpuid
+        rdtsc
+        mov eax,DWORD PTR regs[0]
+        mov edx,DWORD PTR regs[4]
+    }
+    return regs;
+}
+
+#else
+/* This CPU is unsupported. Short-circuit, no profiling here */
+#define get_tick() 0
+#undef USE_XPROF_STATS
+#define USE_XPROF_STATS 0
+#endif
+
+/* end future ticking.h */
+
+class ScopeInfo {
+public:
+    static void ReportAll();
+
+    ScopeInfo(const char * const aName, const int aLine);
+    ~ScopeInfo();
+
+    void doRegister();
+    void unRegister();
+    void report();
+
+    u_int64_t totalTicks;
+    u_int64_t totalVisits;
+
+    const char * const fileName;
+    const int fileLine;
+
+    const u_int64_t id;
+    static u_int64_t LastId;
+};
+
+class ScopeProfiler {
+public:
+    ScopeProfiler(ScopeInfo &aScope): scope(aScope), startTicks(get_tick()) {}
+    ~ScopeProfiler() {
+       scope.totalTicks += get_tick() - startTicks;
+       if (++scope.totalVisits > 100000)
+           scope.report();
+    }
+
+    ScopeInfo &scope;       
+    u_int64_t startTicks;
+};
+
+#define ProfileScope(name, line, uid) \
+    static ScopeInfo ScopeInfoName ## uid(name, line); \
+    ScopeProfiler profiler ## uid(ScopeInfoName ## uid);
+
+#undef PROF_start
+#undef PROF_stop
+#define PROF_start(type) ProfileScope(__FILE__, __LINE__, type)
+#define PROF_stop(type)
+
+#endif /* SQUID_BASE_PROFILING_H */

=== modified file 'src/client_side.cc'
--- src/client_side.cc	2010-09-22 23:04:40 +0000
+++ src/client_side.cc	2010-10-24 17:16:05 +0000
@@ -2040,6 +2040,8 @@
     StoreIOBuffer tempBuffer;
     int r;
 
+    ProfileProgress(Here);
+
     /* pre-set these values to make aborting simpler */
     *method_p = METHOD_NONE;
 
@@ -2136,7 +2138,9 @@
            HttpParserReqSz(hp));
 
     /* Ok, all headers are received */
+    ProfileProgress(Here);
     http = new ClientHttpRequest(conn);
+    ProfileProgress(Here);
 
     http->req_sz = HttpParserRequestLen(hp);
     result = ClientSocketContextNew(http);
@@ -2168,6 +2172,7 @@
         *t = '\0';
 
 #endif
+    ProfileProgress(Here);
 
     /* Rewrite the URL in transparent or accelerator mode */
     /* NP: there are several cases to traverse here:
@@ -2209,10 +2214,12 @@
         strcpy(http->uri, url);
     }
 
+    ProfileProgress(Here);
     setLogUri(http, http->uri);
     debugs(33, 5, "parseHttpRequest: Complete request received");
     result->flags.parsed_ok = 1;
     xfree(url);
+    ProfileProgress(Here);
     return result;
 }
 
@@ -2357,6 +2364,7 @@
 static void
 clientProcessRequest(ConnStateData *conn, HttpParser *hp, ClientSocketContext *context, const HttpRequestMethod& method, HttpVersion http_ver)
 {
+    ProfileProgress(Here);
     ClientHttpRequest *http = context->http;
     HttpRequest *request = NULL;
     bool notedUseOfBuffer = false;
@@ -2434,6 +2442,8 @@
         goto finish;
     }
 
+    ProfileProgress(Here);
+
     request->flags.accelerated = http->flags.accel;
     request->flags.ignore_cc = conn->port->ignore_cc;
     request->flags.no_direct = request->flags.accelerated ? !conn->port->allow_direct : 0;
@@ -2478,6 +2488,8 @@
     request->my_addr = conn->me;
     request->http_ver = http_ver;
 
+    ProfileProgress(Here);
+
     if (request->header.chunked()) {
         chunked = true;
     } else if (request->header.has(HDR_TRANSFER_ENCODING)) {
@@ -2538,6 +2550,8 @@
     if (http->request->method == METHOD_CONNECT)
         context->mayUseConnection(true);
 
+    ProfileProgress(Here);
+
     /* Do we expect a request-body? */
     expectBody = chunked || request->content_length > 0;
     if (!context->mayUseConnection() && expectBody) {
@@ -2573,11 +2587,13 @@
         context->mayUseConnection(!request->body_pipe->productionEnded());
     }
 
+    ProfileProgress(Here);
     http->calloutContext = new ClientRequestContext(http);
 
     http->doCallouts();
 
 finish:
+    ProfileProgress(Here);
     if (!notedUseOfBuffer)
         connNoteUseOfBuffer(conn, http->req_sz);
 
@@ -2594,6 +2610,7 @@
         comm_reset_close(conn->fd);
         return;
     }
+    ProfileProgress(Here);
 }
 
 static void
@@ -2636,6 +2653,7 @@
     HttpVersion http_ver;
     HttpParser hp;
 
+    ProfileProgress(Here);
     debugs(33, 5, "clientParseRequest: FD " << conn->fd << ": attempting to parse");
 
     // Loop while we have read bytes that are not needed for producing the body
@@ -2666,7 +2684,7 @@
         PROF_start(parseHttpRequest);
 
         context = parseHttpRequest(conn, &hp, &method, &http_ver);
-
+        ProfileProgress(Here);
         PROF_stop(parseHttpRequest);
 
         /* partial or incomplete request */
@@ -2683,7 +2701,9 @@
             commSetTimeout(conn->fd, Config.Timeout.lifetime, clientLifetimeTimeout,
                            context->http);
 
+            ProfileProgress(Here);
             clientProcessRequest(conn, &hp, context, method, http_ver);
+            ProfileProgress(Here);
 
             parsed_req = true;
 
@@ -2697,12 +2717,16 @@
 
     /* XXX where to 'finish' the parsing pass? */
 
+    ProfileProgress(Here);
+
     return parsed_req;
 }
 
 void
 ConnStateData::clientReadRequest(const CommIoCbParams &io)
 {
+    ProfileProgress(Here);
+
     debugs(33,5,HERE << "clientReadRequest FD " << io.fd << " size " << io.size);
     Must(reading());
     reader = NULL;
@@ -2778,9 +2802,11 @@
          * be if we have an incomplete request.
          * XXX: This duplicates ClientSocketContext::keepaliveNextRequest
          */
+        ProfileProgress(Here);
         if (getConcurrentRequestCount() == 0 && commIsHalfClosed(fd)) {
             debugs(33, 5, "clientReadRequest: FD " << fd << ": half-closed connection, no completed request parsed, connection closing.");
             comm_close(fd);
+            ProfileProgress(Here);
             return;
         }
     }
@@ -2788,7 +2814,9 @@
     if (!isOpen())
         return;
 
+    ProfileProgress(Here);
     clientAfterReadingRequests(do_next_read);
+    ProfileProgress(Here);
 }
 
 /**
@@ -2800,6 +2828,7 @@
 bool
 ConnStateData::handleReadData(char *buf, size_t size)
 {
+    ProfileProgress(Here);
     char *current_buf = in.addressToReadInto();
 
     if (buf != current_buf)
@@ -2809,9 +2838,12 @@
 
     in.buf[in.notYetUsed] = '\0'; /* Terminate the string */
 
+    ProfileProgress(Here);
+
     // if we are reading a body, stuff data into the body pipe
     if (bodyPipe != NULL)
         return handleRequestBodyData();
+    ProfileProgress(Here);
     return true;
 }
 
@@ -2825,6 +2857,7 @@
 bool
 ConnStateData::handleRequestBodyData()
 {
+    ProfileProgress(Here);
     assert(bodyPipe != NULL);
 
     size_t putSize = 0;
@@ -2846,6 +2879,7 @@
     if (putSize > 0)
         connNoteUseOfBuffer(this, putSize);
 
+    ProfileProgress(Here);
     if (!bodyPipe) {
         debugs(33,5, HERE << "produced entire request body for FD " << fd);
 
@@ -2853,11 +2887,14 @@
             /* we've finished reading like good clients,
              * now do the close that initiateClose initiated.
              */
+            ProfileProgress(Here);
             comm_close(fd);
+            ProfileProgress(Here);
             return false;
         }
     }
 
+    ProfileProgress(Here);
     return true;
 }
 

=== modified file 'src/client_side_reply.cc'
--- src/client_side_reply.cc	2010-09-28 17:43:42 +0000
+++ src/client_side_reply.cc	2010-10-24 20:39:21 +0000
@@ -616,6 +616,7 @@
 void
 clientReplyContext::processMiss()
 {
+    ProfileProgress(Here);
     char *url = http->uri;
     HttpRequest *r = http->request;
     ErrorState *err = NULL;
@@ -768,6 +769,7 @@
 void
 clientReplyContext::created(StoreEntry *newEntry)
 {
+    ProfileProgress(Here);
     if (lookingforstore == 1)
         purgeFoundGet(newEntry);
     else if (lookingforstore == 2)
@@ -1488,6 +1490,7 @@
 void
 clientReplyContext::identifyFoundObject(StoreEntry *newEntry)
 {
+    ProfileProgress(Here);
     StoreEntry *e = newEntry;
     HttpRequest *r = http->request;
 
@@ -1541,6 +1544,7 @@
 
 #endif
 
+    ProfileProgress(Here);
     if (NULL == http->storeEntry()) {
         /** \li If no StoreEntry object is current assume this object isn't in the cache set MISS*/
         debugs(85, 3, "clientProcessRequest2: StoreEntry is NULL -  MISS");
@@ -1607,6 +1611,7 @@
 void
 clientGetMoreData(clientStreamNode * aNode, ClientHttpRequest * http)
 {
+    ProfileProgress(Here);
     /* Test preconditions */
     assert(aNode != NULL);
     assert(cbdataReferenceValid(aNode));
@@ -1629,8 +1634,10 @@
         tempBuffer.length = next->readBuffer.length;
         tempBuffer.data = next->readBuffer.data;
 
+        ProfileProgress(Here);
         storeClientCopy(context->sc, http->storeEntry(),
                         tempBuffer, clientReplyContext::SendMoreData, context);
+        ProfileProgress(Here);
         return;
     }
 
@@ -1639,6 +1646,7 @@
         return;
     }
 
+    ProfileProgress(Here);
     // OPTIONS with Max-Forwards:0 handled in clientProcessRequest()
 
     if (context->http->request->method == METHOD_TRACE) {
@@ -1651,13 +1659,17 @@
         http->logType = LOG_TCP_MISS;
 
         context->doGetMoreData();
-    } else
+    } else {
+        ProfileProgress(Here);
         context->identifyStoreObject();
+	}
+    ProfileProgress(Here);
 }
 
 void
 clientReplyContext::doGetMoreData()
 {
+    ProfileProgress(Here);
     /* We still have to do store logic processing - vary, cache hit etc */
     if (http->storeEntry() != NULL) {
         /* someone found the object in the cache for us */
@@ -1702,6 +1714,7 @@
         /* MISS CASE, http->logType is already set! */
         processMiss();
     }
+    ProfileProgress(Here);
 }
 
 /** The next node has removed itself from the stream. */

=== modified file 'src/client_side_request.cc'
--- src/client_side_request.cc	2010-09-28 17:43:42 +0000
+++ src/client_side_request.cc	2010-10-24 17:27:51 +0000
@@ -1135,6 +1135,7 @@
 void
 ClientHttpRequest::httpStart()
 {
+    ProfileProgress(Here);
     PROF_start(httpStart);
     logType = LOG_TAG_NONE;
     debugs(85, 4, "ClientHttpRequest::httpStart: " << log_tags[logType] << " for '" << uri << "'");
@@ -1277,6 +1278,7 @@
 void
 ClientHttpRequest::doCallouts()
 {
+    ProfileProgress(Here);
     assert(calloutContext);
 
     /*Save the original request for logging purposes*/
@@ -1287,6 +1289,7 @@
         debugs(83, 3, HERE << "Doing calloutContext->clientAccessCheck()");
         calloutContext->http_access_done = true;
         calloutContext->clientAccessCheck();
+        ProfileProgress(Here);
         return;
     }
 
@@ -1296,7 +1299,10 @@
         if (Adaptation::AccessCheck::Start(
                     Adaptation::methodReqmod, Adaptation::pointPreCache,
                     request, NULL, adaptationAclCheckDoneWrapper, calloutContext))
+{
+            ProfileProgress(Here);
             return; // will call callback
+}
     }
 #endif
 
@@ -1308,6 +1314,7 @@
             debugs(83, 3, HERE << "Doing calloutContext->clientRedirectStart()");
             calloutContext->redirect_state = REDIRECT_PENDING;
             calloutContext->clientRedirectStart();
+            ProfileProgress(Here);
             return;
         }
     }
@@ -1316,6 +1323,7 @@
         debugs(83, 3, HERE << "Doing calloutContext->clientAccessCheck2()");
         calloutContext->adapted_http_access_done = true;
         calloutContext->clientAccessCheck2();
+        ProfileProgress(Here);
         return;
     }
 
@@ -1323,6 +1331,7 @@
         debugs(83, 3, HERE << "Doing clientInterpretRequestHeaders()");
         calloutContext->interpreted_req_hdrs = 1;
         clientInterpretRequestHeaders(this);
+        ProfileProgress(Here);
     }
 
     if (!calloutContext->no_cache_done) {
@@ -1331,6 +1340,7 @@
         if (Config.accessList.noCache && request->flags.cachable) {
             debugs(83, 3, HERE << "Doing calloutContext->checkNoCache()");
             calloutContext->checkNoCache();
+            ProfileProgress(Here);
             return;
         }
     }
@@ -1344,9 +1354,11 @@
             int tos = aclMapTOS(Config.accessList.clientside_tos, &ch);
             if (tos)
                 comm_set_tos(getConn()->fd, tos);
+            ProfileProgress(Here);
         }
     }
 
+    ProfileProgress(Here);
     cbdataReferenceDone(calloutContext->http);
     delete calloutContext;
     calloutContext = NULL;
@@ -1356,13 +1368,16 @@
 #endif
 
     debugs(83, 3, HERE << "calling processRequest()");
+    ProfileProgress(Here);
     processRequest();
+    ProfileProgress(Here);
 
 #if ICAP_CLIENT
     Adaptation::Icap::History::Pointer ih = request->icapHistory();
     if (ih != NULL)
         ih->logType = logType;
 #endif
+    ProfileProgress(Here);
 }
 
 #if !_USE_INLINE_

=== modified file 'src/comm_epoll.cc'
--- src/comm_epoll.cc	2010-10-14 04:16:15 +0000
+++ src/comm_epoll.cc	2010-10-24 22:56:01 +0000
@@ -280,6 +280,7 @@
 
     PROF_stop(comm_check_incoming);
     getCurrentTime();
+    LiveProfilingStart = current_time;
 
     statHistCount(&statCounter.select_fds_hist, num);
 
@@ -287,6 +288,7 @@
         return COMM_TIMEOUT;		/* No error.. */
 
     PROF_start(comm_handle_ready_fd);
+    ProfileProgress(Here);
 
     for (i = 0, cevents = pevents; i < num; i++, cevents++) {
         fd = cevents->data.fd;
@@ -297,6 +299,8 @@
 
         // TODO: add EPOLLPRI??
 
+        ProfileProgress(Here);
+
         if (cevents->events & (EPOLLIN|EPOLLHUP|EPOLLERR) || F->flags.read_pending) {
             if ((hdl = F->read_handler) != NULL) {
                 debugs(5, DEBUG_EPOLL ? 0 : 8, "comm_select(): Calling read handler on FD " << fd);
@@ -313,6 +317,8 @@
             }
         }
 
+        ProfileProgress(Here);
+
         if (cevents->events & (EPOLLOUT|EPOLLHUP|EPOLLERR)) {
             if ((hdl = F->write_handler) != NULL) {
                 debugs(5, DEBUG_EPOLL ? 0 : 8, "comm_select(): Calling write handler on FD " << fd);
@@ -329,6 +335,7 @@
         }
     }
 
+    ProfileProgress(Here);
     PROF_stop(comm_handle_ready_fd);
 
     return COMM_OK;

=== modified file 'src/debug.cc'
--- src/debug.cc	2010-07-06 23:09:44 +0000
+++ src/debug.cc	2010-10-25 02:12:46 +0000
@@ -805,3 +805,85 @@
 
     return path+BuildPrefixLength;
 }
+
+bool ForceLiveProfiling = false;
+int LiveProfilingDepth = 0;
+timeval LiveProfilingStart = {0, 0};
+static const int LiveProfilingDepthMax = 10240;
+unsigned int LivePorfilingEventCount = 0;
+
+class LiveProfilingHistoryStep {
+public:
+    const char *fileName;
+    int lineNo;
+    timeval stamp;
+};
+static LiveProfilingHistoryStep LiveProfilingHistory[LiveProfilingDepthMax];
+void ProfileProgress(const char * const fileName, const int lineNo)
+{
+    ++LivePorfilingEventCount;
+return;
+    getCurrentTime();
+
+    LiveProfilingHistoryStep &step =
+        LiveProfilingHistory[min(LiveProfilingDepth++, LiveProfilingDepthMax)];
+    step.fileName = fileName;
+    step.lineNo = lineNo;
+    step.stamp = current_time;
+}
+
+void LiveProfilingReport()
+{
+    timeval last = LiveProfilingStart;
+    int aggr = 0;
+    for (int i = 0; i < LiveProfilingDepth; ++i) {
+        LiveProfilingHistoryStep &step = LiveProfilingHistory[i];
+        const int spent = tvSubUsec(last, step.stamp);
+        aggr += spent;
+        debugs(1,1, step.fileName << ':' << step.lineNo << ": step" << i <<
+            ": " << spent << " / " << aggr);
+        last = step.stamp;
+    }
+}
+
+#if 0 /* XXX: remove old */
+void ProfileProgress(const char * const fileName, const int lineNo)
+{
+    static const char *LastFileName = "?";
+    static int LastLineNo = 0;
+    static int LastSpent = 0;
+
+    getCurrentTime();
+    static timeval Last = { 0,0 };
+    static int Aggr = 0;
+    if (!LiveProfilingDepth++) {
+        Last = LiveProfilingStart;
+        Aggr = 0;
+        LastFileName = 0;
+        LastLineNo = LastSpent = 0;
+    }
+    const int spent = tvSubUsec(Last, current_time);
+    Aggr += spent;
+    Last = current_time;
+    
+    if (!ForceLiveProfiling && spent > 200 && LastFileName) {
+        debugs(1,1, LastFileName << ':' << LastLineNo << ": " <<
+            (LiveProfilingDepth-1) << "-spent: " << LastSpent << " (old)");
+        LastFileName = 0;
+    }
+
+    if (ForceLiveProfiling || spent > 200) {
+        debugs(1,1, fileName << ':' << lineNo << ": " << LiveProfilingDepth <<
+            "-spent: " << spent << " / " << Aggr << " or "
+            << tvSubUsec(LiveProfilingStart, current_time));
+        LastFileName = 0;
+    } else {
+        LastFileName = fileName;
+        LastLineNo = lineNo;
+        LastSpent = spent;
+	}
+}
+#endif
+
+
+#include "base/Profiling.cc"

=== modified file 'src/forward.cc'
--- src/forward.cc	2010-10-14 04:16:15 +0000
+++ src/forward.cc	2010-10-24 20:56:11 +0000
@@ -105,6 +105,7 @@
 // Called once, right after object creation, when it is safe to set self
 void FwdState::start(Pointer aSelf)
 {
+    ProfileProgress(Here);
     // Protect ourselves from being destroyed when the only Server pointing
     // to us is gone (while we expect to talk to more Servers later).
     // Once we set self, we are responsible for clearing it when we do not
@@ -116,6 +117,7 @@
 
     entry->registerAbort(FwdState::abort, this);
     peerSelect(request, entry, fwdStartCompleteWrapper, this);
+    ProfileProgress(Here);
 
     // TODO: set self _after_ the peer is selected because we do not need
     // self until we start talking to some Server.
@@ -199,6 +201,7 @@
 void
 FwdState::fwdStart(int client_fd, StoreEntry *entry, HttpRequest *request)
 {
+    ProfileProgress(Here);
     /** \note
      * client_addr == no_addr indicates this is an "internal" request
      * from peer_digest.c, asn.c, netdb.c, etc and should always
@@ -789,6 +792,7 @@
 void
 FwdState::connectStart()
 {
+    ProfileProgress(Here);
     const char *url = entry->url();
     int fd = -1;
     FwdServer *fs = servers;
@@ -960,6 +964,7 @@
 
     updateHierarchyInfo();
     commConnectStart(fd, host, port, fwdConnectDoneWrapper, this);
+    ProfileProgress(Here);
 }
 
 void
@@ -988,6 +993,7 @@
 void
 FwdState::dispatch()
 {
+    ProfileProgress(Here);
     peer *p = NULL;
     debugs(17, 3, "fwdDispatch: FD " << client_fd << ": Fetching '" << RequestMethodStr(request->method) << " " << entry->url() << "'" );
     /*
@@ -1051,6 +1057,7 @@
         }
     }
 #endif
+    ProfileProgress(Here);
 
     if (servers && (p = servers->_peer)) {
         p->stats.fetches++;

=== modified file 'src/http.cc'
--- src/http.cc	2010-09-28 17:43:42 +0000
+++ src/http.cc	2010-10-24 21:21:59 +0000
@@ -63,6 +63,43 @@
 #include "SquidTime.h"
 #include "Store.h"
 
+#if 0 // XXX: remove
+//#include "CodeTimer.h"
+timeval CodeTimerStart = { 0, 0};
+class CodeTimer {
+public:
+    CodeTimer(const char * const aStartName, const int aStartLine):
+              startName(aStartName), startLine(aStartLine)
+    {
+        if (!Depth++)
+            LastTime = CodeTimerStart;
+        tick(startName, startLine, "@beg ");
+    }
+    ~CodeTimer() { tick(startName, startLine, "@end "); --Depth; }
+
+    void tick(const char * const nowName, const int nowLine, const char * const prefix = "")
+    {
+        getCurrentTime();
+        int spent = -1;
+        if (LastTime.tv_sec > 0)
+            spent = tvSubUsec(LastTime, current_time);
+        else
+        if (CodeTimerStart.tv_sec > 0)
+            spent = tvSubUsec(CodeTimerStart, current_time);
+
+        if (CodeTimerStart.tv_sec > 0 || spent > 200)
+            debugs(1,1, nowName << ':' << nowLine << ": " << prefix << "spent: +" << spent);
+        LastTime = current_time;
+    }
+
+    const char * const startName;
+    const int startLine;
+    static timeval LastTime;
+    static int Depth;
+};
+timeval CodeTimer::LastTime = { 0, 0};
+int CodeTimer::Depth = 0;
+#endif
 
 #define SQUID_ENTER_THROWING_CODE() try {
 #define SQUID_EXIT_THROWING_CODE(status) \
@@ -85,6 +122,7 @@
         lastChunk(0), header_bytes_read(0), reply_bytes_read(0),
         body_bytes_truncated(0), httpChunkDecoder(NULL)
 {
+    ProfileProgress(Here);
     debugs(11,5,HERE << "HttpStateData " << this << " created");
     ignoreCacheControl = false;
     surrogateNoStore = false;
@@ -1077,6 +1115,8 @@
     int clen;
     int len = io.size;
 
+    ProfileProgress(Here);
+
     assert(fd == io.fd);
 
     flags.do_next_read = 0;
@@ -1115,6 +1155,7 @@
     // update I/O stats
     if (len > 0) {
         readBuf->appended(len);
+        ProfileProgress(Here);
         reply_bytes_read += len;
 #if DELAY_POOLS
 
@@ -1137,6 +1178,8 @@
             sent.tv_sec ? tvSubMsec(sent, current_time) : -1;
     }
 
+    ProfileProgress(Here);
+
     /** \par
      * Here the RFC says we should ignore whitespace between replies, but we can't as
      * doing so breaks HTTP/0.9 replies beginning with witespace, and in addition
@@ -1181,6 +1224,7 @@
         }
     }
 
+    ProfileProgress(Here);
     processReply();
 }
 
@@ -1190,6 +1234,8 @@
 HttpStateData::processReply()
 {
 
+    ProfileProgress(Here);
+
     if (flags.handling1xx) { // we came back after handling a 1xx response
         debugs(11, 5, HERE << "done with 1xx handling");
         flags.handling1xx = false;
@@ -1199,11 +1245,13 @@
     if (!flags.headers_parsed) { // have not parsed headers yet?
         PROF_start(HttpStateData_processReplyHeader);
         processReplyHeader();
+        ProfileProgress(Here);
         PROF_stop(HttpStateData_processReplyHeader);
 
         if (!continueAfterParsingHeader()) // parsing error or need more data
             return; // TODO: send errors to ICAP
 
+        ProfileProgress(Here);
         adaptOrFinalizeReply();
     }
 
@@ -1312,10 +1360,13 @@
 void
 HttpStateData::writeReplyBody()
 {
+    ProfileProgress(Here);
     truncateVirginBody(); // if needed
     const char *data = readBuf->content();
     int len = readBuf->contentSize();
+    ProfileProgress(Here);
     addVirginReplyBody(data, len);
+    ProfileProgress(Here);
     readBuf->consume(len);
 }
 
@@ -1352,6 +1403,8 @@
 void
 HttpStateData::processReplyBody()
 {
+    ProfileProgress(Here);
+
     AsyncCall::Pointer call;
     Ip::Address client_addr;
     bool ispinned = false;
@@ -1376,6 +1429,7 @@
      */
     if (flags.chunked) {
         if (!decodeAndWriteReplyBody()) {
+    ProfileProgress(Here);
             flags.do_next_read = 0;
             serverComplete();
             return;
@@ -1383,6 +1437,7 @@
     } else
         writeReplyBody();
 
+    ProfileProgress(Here);
     if (EBIT_TEST(entry->flags, ENTRY_ABORTED)) {
         /*
          * The above writeReplyBody() call could ABORT this entry,
@@ -1413,6 +1468,8 @@
             commSetTimeout(fd, -1, call);
             flags.do_next_read = 0;
 
+    ProfileProgress(Here);
+
             comm_remove_close_handler(fd, closeHandler);
             closeHandler = NULL;
             fwd->unregister(fd);
@@ -1436,6 +1493,7 @@
 
             fd = -1;
 
+            ProfileProgress(Here);
             serverComplete();
             return;
 
@@ -1445,6 +1503,7 @@
             return;
         }
 
+    ProfileProgress(Here);
     maybeReadVirginBody();
 }
 
@@ -2026,6 +2085,7 @@
                                   StoreEntry * sentry,
                                   MemBuf * mb)
 {
+    ProfileProgress(Here);
     const int offset = mb->size;
     HttpVersion httpver(1,1);
     mb->Printf("%s %s HTTP/%d.%d\r\n",
@@ -2037,6 +2097,7 @@
         HttpHeader hdr(hoRequest);
         Packer p;
         httpBuildRequestHeader(aRequest, original_request, sentry, &hdr, flags);
+        ProfileProgress(Here);
 
         if (aRequest->flags.pinned && aRequest->flags.connection_auth)
             aRequest->flags.auth_sent = 1;
@@ -2045,11 +2106,14 @@
 
         packerToMemInit(&p, mb);
         hdr.packInto(&p);
+        ProfileProgress(Here);
         hdr.clean();
         packerClean(&p);
+        ProfileProgress(Here);
     }
     /* append header terminator */
     mb->append(crlf, 2);
+    ProfileProgress(Here);
     return mb->size - offset;
 }
 
@@ -2057,6 +2121,7 @@
 bool
 HttpStateData::sendRequest()
 {
+    ProfileProgress(Here);
     MemBuf mb;
 
     debugs(11, 5, "httpSendRequest: FD " << fd << ", request " << request << ", this " << this << ".");
@@ -2074,6 +2139,7 @@
     flags.do_next_read = 1;
     maybeReadVirginBody();
 
+    ProfileProgress(Here);
     if (orig_request->body_pipe != NULL) {
         if (!startRequestBodyFlow()) // register to receive body data
             return false;
@@ -2128,12 +2194,16 @@
         flags.front_end_https = _peer->front_end_https;
     }
 
+    ProfileProgress(Here);
+
     mb.init();
     request->peer_host=_peer?_peer->host:NULL;
     buildRequestPrefix(request, orig_request, entry, &mb);
+    ProfileProgress(Here);
     debugs(11, 6, "httpSendRequest: FD " << fd << ":\n" << mb.buf);
     comm_write_mbuf(fd, &mb, requestSender);
 
+    ProfileProgress(Here);
     return true;
 }
 

=== modified file 'src/mem.cc'
--- src/mem.cc	2010-10-14 04:16:15 +0000
+++ src/mem.cc	2010-10-23 00:49:16 +0000
@@ -488,7 +488,11 @@
 memClean(void)
 {
     MemPoolGlobalStats stats;
-    MemPools::GetInstance().setIdleLimit(0);
+
+// XXX: restore
+//    MemPools::GetInstance().setIdleLimit(0);
+// XXX
+
     MemPools::GetInstance().clean(0);
     memPoolGetGlobalStats(&stats);
 

=== modified file 'src/peer_select.cc'
--- src/peer_select.cc	2010-01-30 00:30:56 +0000
+++ src/peer_select.cc	2010-10-24 20:48:36 +0000
@@ -126,6 +126,7 @@
            PSC * callback,
            void *callback_data)
 {
+    ProfileProgress(Here);
     ps_state *psstate;
 
     if (entry)
@@ -180,6 +181,7 @@
 static void
 peerSelectCallback(ps_state * psstate)
 {
+    ProfileProgress(Here);
     StoreEntry *entry = psstate->entry;
     FwdServer *fs = psstate->servers;
     PSC *callback;
@@ -261,6 +263,7 @@
 static void
 peerSelectFoo(ps_state * ps)
 {
+    ProfileProgress(Here);
     StoreEntry *entry = ps->entry;
     HttpRequest *request = ps->request;
     debugs(44, 3, "peerSelectFoo: '" << RequestMethodStr(request->method) << " " << request->GetHost() << "'");
@@ -305,6 +308,7 @@
         debugs(44, 3, "peerSelectFoo: direct = " << DirectStr[ps->direct]);
     }
 
+    ProfileProgress(Here);
     if (!entry || entry->ping_status == PING_NONE)
         peerSelectPinned(ps);
     if (entry == NULL) {
@@ -345,6 +349,7 @@
     }
 
     peerSelectCallback(ps);
+    ProfileProgress(Here);
 }
 
 /*
@@ -762,6 +767,7 @@
 static void
 peerAddFwdServer(FwdServer ** FSVR, peer * p, hier_code code)
 {
+    ProfileProgress(Here);
     FwdServer *fs = (FwdServer *)memAllocate(MEM_FWD_SERVER);
     debugs(44, 5, "peerAddFwdServer: adding " <<
            (p ? p->host : "DIRECT")  << " " <<
@@ -773,6 +779,7 @@
         FSVR = &(*FSVR)->next;
 
     *FSVR = fs;
+    ProfileProgress(Here);
 }
 
 void *

=== modified file 'src/stmem.cc'
--- src/stmem.cc	2009-03-06 13:26:57 +0000
+++ src/stmem.cc	2010-10-24 16:24:19 +0000
@@ -133,6 +133,7 @@
 size_t
 mem_hdr::writeAvailable(mem_node *aNode, int64_t location, size_t amount, char const *source)
 {
+    ProfileProgress(Here);
     /* if we attempt to overwrite existing data or leave a gap within a node */
     assert (location == aNode->nodeBuffer.offset + (int64_t)aNode->nodeBuffer.length);
     /* And we are not at the end of the node */
@@ -142,7 +143,9 @@
     assert (location - aNode->nodeBuffer.offset == (int64_t)aNode->nodeBuffer.length);
     size_t copyLen = min(amount, aNode->space());
 
+    ProfileProgress(Here);
     xmemcpy(aNode->nodeBuffer.data + aNode->nodeBuffer.length, source, copyLen);
+    ProfileProgress(Here);
 
     if (inmem_hi <= location)
         inmem_hi = location + copyLen;
@@ -157,6 +160,7 @@
 mem_hdr::appendNode (mem_node *aNode)
 {
     nodes.insert (aNode, NodeCompare);
+    ProfileProgress(Here);
 }
 
 void
@@ -325,10 +329,13 @@
 mem_node *
 mem_hdr::nodeToRecieve(int64_t offset)
 {
+    ProfileProgress(Here);
+
     /* case 1: Nothing in memory */
 
     if (!nodes.size()) {
         appendNode (new mem_node(offset));
+        ProfileProgress(Here);
         return nodes.start()->data;
     }
 
@@ -344,6 +351,7 @@
             candidate = *leadup;
     }
 
+    ProfileProgress(Here);
     if (candidate && candidate->canAccept(offset))
         return candidate;
 
@@ -351,6 +359,7 @@
     candidate = new mem_node(offset);
 
     appendNode (candidate);
+    ProfileProgress(Here);
 
     /* simpler to write than a indented if */
     return candidate;
@@ -360,6 +369,7 @@
 bool
 mem_hdr::write (StoreIOBuffer const &writeBuffer)
 {
+    ProfileProgress(Here);
     PROF_start(mem_hdr_write);
     debugs(19, 6, "mem_hdr::write: " << writeBuffer.range() << " object end " << endOffset());
 
@@ -371,6 +381,7 @@
         return false;
     }
 
+    ProfileProgress(Here);
     assert (writeBuffer.offset >= 0);
 
     mem_node *target;
@@ -386,6 +397,7 @@
         currentSource += wrote;
     }
 
+    ProfileProgress(Here);
     PROF_stop(mem_hdr_write);
     return true;
 }

=== modified file 'src/store.cc'
--- src/store.cc	2010-10-14 04:16:15 +0000
+++ src/store.cc	2010-10-25 04:03:29 +0000
@@ -379,6 +379,7 @@
 void
 StoreEntry::destroyMemObject()
 {
+    ProfileScope(__FILE__, __LINE__, 0);
     debugs(20, 3, HERE << "destroyMemObject " << mem_obj);
     setMemStatus(NOT_IN_MEMORY);
     MemObject *mem = mem_obj;
@@ -389,6 +390,7 @@
 void
 destroyStoreEntry(void *data)
 {
+    ProfileScope(__FILE__, __LINE__, 0);
     debugs(20, 3, HERE << "destroyStoreEntry: destroying " <<  data);
     StoreEntry *e = static_cast<StoreEntry *>(static_cast<hash_link *>(data));
     assert(e != NULL);
@@ -410,6 +412,7 @@
 void
 StoreEntry::hashInsert(const cache_key * someKey)
 {
+    ProfileScope(__FILE__, __LINE__, 0);
     debugs(20, 3, "StoreEntry::hashInsert: Inserting Entry " << this << " key '" << storeKeyText(someKey) << "'");
     key = storeKeyDup(someKey);
     hash_join(store_table, this);
@@ -418,6 +421,7 @@
 void
 StoreEntry::hashDelete()
 {
+    ProfileScope(__FILE__, __LINE__, 0);
     hash_remove_link(store_table, this);
     storeKeyFree((const cache_key *)key);
     key = NULL;
@@ -813,6 +817,8 @@
 void
 StoreEntry::write (StoreIOBuffer writeBuffer)
 {
+    ProfileProgress(Here);
+
     assert(mem_obj != NULL);
     assert(writeBuffer.length >= 0);
     /* This assert will change when we teach the store to update */
@@ -835,8 +841,11 @@
 
     debugs(20, 5, "storeWrite: writing " << writeBuffer.length << " bytes for '" << getMD5Text() << "'");
     PROF_stop(StoreEntry_write);
+    ProfileProgress(Here);
     storeGetMemSpace(writeBuffer.length);
+    ProfileProgress(Here);
     mem_obj->write (writeBuffer, storeWriteComplete, this);
+    ProfileProgress(Here);
 }
 
 /* Append incoming data from a primary server to an entry. */
@@ -1791,7 +1800,21 @@
 
     assert (isEmpty());
 
-    getReply()->packHeadersInto(&p);
+    // Pack header into a temporary buffer and then write the whole buffer
+    // at once to avoid hundreds of calls to mem_hdr::write() per header.
+    // While each call is relatively cheap, it is death by thousands cuts.
+    {
+        MemBuf hdrBuf;
+        hdrBuf.init();
+
+        Packer hdrPacker;
+        packerToMemInit(&hdrPacker, &hdrBuf);
+        getReply()->packHeadersInto(&hdrPacker);
+        packerClean(&hdrPacker);
+
+        if (hdrBuf.contentSize())
+            packerAppend(&p, hdrBuf.content(), hdrBuf.contentSize());
+	}
 
     rep->hdr_sz = mem_obj->endOffset();
 

=== modified file 'src/store_client.cc'
--- src/store_client.cc	2010-04-07 09:49:57 +0000
+++ src/store_client.cc	2010-10-25 16:32:34 +0000
@@ -444,8 +444,11 @@
     /* What the client wants is in memory */
     /* Old style */
     debugs(90, 3, "store_client::doCopy: Copying normal from memory");
+    ProfileProgress(Here);
     size_t sz = entry->mem_obj->data_hdr.copy(copyInto);
+    ProfileProgress(Here);
     callback(sz);
+    ProfileProgress(Here);
     flags.store_copying = 0;
 }
 

=== modified file 'src/tools.cc'
--- src/tools.cc	2010-09-23 17:58:15 +0000
+++ src/tools.cc	2010-10-20 16:35:09 +0000
@@ -424,7 +424,7 @@
 
     if (state == 0) {
 #if !MEM_GEN_TRACE
-        Debug::parseOptions("ALL,7");
+        Debug::parseOptions("ALL,9");
 #else
 
         log_trace_done();


