User:Hagman/mm18


 * 1) define VERSION   "3.0"

//#define PROFILE //#define ASM
 * 1) define NDEBUG

using namespace std;
 * 1) include 
 * 2) include 
 * 3) include 
 * 4) include
 * 5) include
 * 6) include
 * 7) include
 * 8) include
 * 9) include
 * 10) include 

int posix_memalign(void** pptr, size_t alignment, size_t size) {	 *pptr = malloc(size); return 0; }
 * 1) ifdef WIN32
 * 2) include 
 * 3) define ATTRIBUTE(x)
 * 4) define INT64    __int64
 * 5) define INT64FMT "I64"
 * 1) else
 * 2) include 
 * 3) define ATTRIBUTE(x) __attribute__ (x)
 * 4) define INT64    long long
 * 5) define INT64FMT "ll"
 * 6) endif


 * 1) ifdef NDEBUG
 * 2) define DEBOUT(args)    assert(true)
 * 3) define DEBOUT10(args)    assert(true)
 * 4) define DEBOUTLONGSTRING(args) assert(true)
 * 5) define DEBOUTLONGSTRING10(args) assert(true)
 * 6) else
 * 7) define DEBOUT(args) Deb.DoDebOut args
 * 8) define DEBOUT10(args) do { static int _a=0; if (++_a<20) Deb.DoDebOut args; } while(0)
 * 9) define DEBOUTLONGSTRING(args) Deb.DoDebOutString args
 * 10) define DEBOUTLONGSTRING10(args) do { static int _a=0; if (++_a<20) Deb.DoDebOutString args; } while(0)
 * 11) endif


 * 1) define CLR(v) memset(v,0,sizeof(v))


 * 1) define MILLION 1000000

/* Class for debugging output and time measurement class Debug { private: int firsttime; public: Debug { DecreaseTime(musecs); }   void DecreaseTime(int mus) {       firsttime += mus; }   int musecs { int thistime; thistime = GetTickCount*91; struct timeval now; gettimeofday(&now,NULL); thistime = now.tv_sec*MILLION + now.tv_usec; return thistime - firsttime; }
 * 1) ifdef WIN32
 * 1) else
 * 1) endif

void DoDebOut (const char* fmt, ...) ATTRIBUTE ((format (printf, 2, 3))); void DoDebOutString (const char* fmt, const char* s); };
 * 1) ifndef NDEBUG
 * 1) endif

/* For profiling of up to 100 different sections. - Insert TIMER(name) before and after section (at all exits!) - Call Profiler::Print at the end namespace Profiler { typedef struct { INT64 time; int count; const char* Name; } tProf; tProf Profile[100]; tProf* MaxId = Profile; tProf* Name2Id(const char* s) { for (tProf* i=Profile; i< MaxId; ++i) if (!strcmp(s,i->Name)) return i;       MaxId->time  = 0; MaxId->count = 0; MaxId->Name = s;        return MaxId++; }   void Print { for (tProf* i=Profile; itime > 0) { if (i->count && !(i->count&1)) fprintf(stderr,"%20s : %16" INT64FMT "d / %8d -> %16" INT64FMT "d\n",                    i->Name,i->time,i->count/2,i->time/(i->count/2)); else fprintf(stderr,"%20s : Illegal count == %d\n", i->Name, i->count); } else { fprintf(stderr,"%20s :                   %8d\n",i->Name,i->count); }   }  }
 * 1) ifdef PROFILE

INT64 RdT { __asm { rdtsc; } }   static Profiler::tProf *atime=NULL; \ if (!atime) atime=Profiler::Name2Id( #name ); \ INT64 now; RdTsC(now); \ atime->time = now - atime->time; ++atime->count; } while (0) static Profiler::tProf *atime=NULL; \ if (!atime) atime=Profiler::Name2Id( #name ); \ ++atime->count; } while (0)
 * 1) ifdef WIN32
 * 1)  define RdTsC(now)    now=RdT
 * 2) else
 * 3)  define RdTsC(now)    asm volatile("rdtsc" : "=A"(now) )
 * 4) endif
 * 5) define TIMER(name) do { \
 * 1) define COUNTER(name) do { \
 * 1) else
 * 2) define TIMER(foo)    assert(true)
 * 3) define COUNTER(foo)    assert(true)
 * 4) endif

void Debug::DoDebOut (const char* fmt, ...) {   TIMER(DEBUG); va_list marker; va_start(marker,fmt); char buf[256]; int t = musecs; sprintf(buf,"%2d.%06d: %s",t/MILLION,t%MILLION,fmt); vprintf(buf,marker); fflush(stdout); TIMER(DEBUG); }
 * 1) ifndef NDEBUG

void Debug::DoDebOutString (const char* fmt, const char* s) { TIMER(DEBUG); int L = strlen(s); if (L>LONGSTR1+LONGSTR2 +3) { char buf[LONGSTR1+LONGSTR2+4]; memcpy(buf,s,LONGSTR1); sprintf(buf+LONGSTR1,"...%s",s+L-LONGSTR2); s = buf; }   char buf[256]; int t = musecs; sprintf(buf,"%2d.%06d: %s",t/MILLION,t%MILLION,fmt); printf(buf,s); fflush(stdout); TIMER(DEBUG); } Debug Deb;
 * 1) define LONGSTR1    100
 * 2) define LONGSTR2    10
 * 1) endif

using namespace std;

/* We use hashs based on four letter sequences to define a linked list of occurrences int Hash[HASHSIZE];
 * 1) define HASHSIZE 999997
 * 1) define HASHSIZE 999997

/* "Len" Characters staring at offset "srcA" in source string are identical as the corresponding characters starting at offset "dstA" in target string. class M_Block { public: int srcA,dstA,Len; // roud up to 16 byte for speed? public: M_Block(int src, int dst, int L):srcA(src),dstA(dst),Len(L) {} bool contains(const M_Block& o) const { return dstA<=o.dstA && dstA+Len>=o.dstA+o.Len; }   bool operator <(const M_Block& o) const { return dstAo.Len || (Len==o.Len && srcA<o.srcA))); } };

/* Some things are also used outside the main class. Could be static members instead. int GlobalBasecost; const char* GlobalFinalstr; /* For building M/I/D sequences char* GlobalScratchpad;

/* Calculate actual cost of e.g. "0-10 99-999 MMMIIDIDMID.." int StringCost(const char* s) { TIMER(StringCost); int ac = 0; for { switch (*s++) { case 'D': case 'I': ac++; break; case ' ': ac+=GlobalBasecost; break; case 0: TIMER(StringCost); return ac; }   } }

/* Fingerprint of strings (to simplify revert detection). Should probably use CRC32 instead. unsigned StringHash(const string& s0) {   TIMER(StringHash); unsigned int *p = (unsigned int *)s0.c_str; unsigned a = s0.length, b=0, c=0, d=0; unsigned int *q = p + ((a>>2)&~3); for (p3) wri += sprintf(wri,"%d ",count); if (!(*wri++ = c = *s)) return buf; count = 1; }   } }
 * 1) ifndef NDEBUG
 * 1) endif

/* Use to build M/I/D string from source and destination class Writer { private: char *wri; const char * src; const char * dst; int M,I,D; public: int InsertStats const { return I;} int DeleteStats const { return D;} int MoveStats const { return M;} Writer: wri(GlobalScratchpad), src(NULL), dst(GlobalFinalstr),M(0),I(0),D(0) {} // To switch to a new source block void SetSource(const char* s) { src=s; }

// Is 'M' possible? bool CanMove const { return *src==*dst; }   void Move   { *wri++='M'; assert(*src==*dst); ++src; ++dst; ++M; }   void Insert { *wri++='I'; ++dst; ++I; }   void Delete { *wri++='D'; ++src; ++D; }   // finish by adding 'I' as necessary and terminate with '\0' void Close(const char* end) { assert(dst<=end); while (dst<end) Insert; *wri = 0; }   bool SourceBefore(const char* s) const { return src<s; }   bool DestBefore(const char* d) const { return dst<d; } };

class MI_Block { const char *Src0, *Src1, *Dst0, *Dst1; public: const char* srcbegin const { return Src0; } const char* srcend const { return Src1; } const char* dstbegin const { return Dst0; } const char* dstend const { return Dst1; } /*    Calculate the cost of this block. This is based on the assumption that only 'M' and 'I' are used. M_Blocks with 'D' are of class MID_Block */   int getCost const { return GlobalBasecost + (Dst1-Dst0)-(Src1-Src0);} MI_Block: Src0(NULL), Src1(NULL), Dst0(NULL), Dst1(NULL) {} MI_Block(const char* src, const char*dst, int len): Src0(src),Src1(src+len),Dst0(dst),Dst1(dst+len) {} /*    Find preceeding source character in destination and extend accoringly (possibly with several cnosecutive chars). Returns characters added (to destination) */   int GrowFront(const char* s0, const char* d0) {       TIMER(GrowFront); assert(Src0); assert(Dst0); assert(s0); assert(d0); if (Src0==s0 || Dst0==d0) { TIMER(GrowFront); return false; }       char c=Src0[-1]; const char* d=Dst0-1; while (d>d0 && *d!=c) --d; if (*d!=c) { TIMER(GrowFront); return 0; }       Src0--; Dst0=d; while (Src0>s0 && Dst0>d0 && Src0[-1]==Dst0[-1]) { --Src0; --Dst0; }       TIMER(GrowFront); return d-Dst0+1; }   /*     Same at end (returns only success/failure) */   bool GrowBack {       TIMER(GrowBack); assert(Src1); assert(Dst1); char c=*Src1; if (!c) { TIMER(GrowBack); return false; } const char*d=Dst1; while (*d && *d!=c) ++d; if (*d!=c) { TIMER(GrowBack); return false; } Dst1=d; while (*Dst1==*Src1 && *Dst1) { ++Src1; ++Dst1; }       TIMER(GrowBack); return true; }   /*     If next block output would overlap, shorten this one at the right end. In theory, might produce unexpected improvement, aka. "unrequested fission surplus" */   void ChopEndTo(const char *d) {       TIMER(ChopEndTo); assert(d<=Dst1); assert(d>Dst0); while (dSrc0); --Src1; }           --Dst1; }       TIMER(ChopEndTo); } };

/* A sequence of MI_Blocks that can be connected with one or more 'D' class MID_Block { private: vector  Parts; public: // Start with at least one MI_Block MID_Block(const MI_Block& B) { Parts.push_back(B); }   // Add another MI_Block. New block must be *left* of all previous void Join(const MI_Block& GB) { Parts.push_back(GB); }   /*     Determine if a block can and should be added. It must be to the left of all previous blocks and a greedily constructed M/I/D sequence to glue them must not outweigh the saving made. */   bool WorthJoining(const MI_Block& GB) { const char *S0=Parts.rbegin->srcbegin, *D0 = Parts.rbegin->dstbegin; const char *s = GB.srcend, *d = GB.dstend; int srcdist = S0 - s;       int dstdist = D0 - d;        assert(dstdist >= 0); if (srcdist < 0 ) return false; if (dstdist+srcdist < GlobalBasecost) return true; if (abs(dstdist-srcdist) >= GlobalBasecost) return false; int cost = 0; while (ssrcbegin - base,             Parts.begin->srcend -base-1); return buf; }   /*     Produce the M/I/D sequence. Between blocks, use greedy algorithm for 'M' and 'D', within blocks, use greedy algorithm for 'M' and 'I'. */   void emit(Writer& W) const {       TIMER(M::emit); vector ::const_reverse_iterator i = Parts.rbegin; W.SetSource(i->srcbegin); for (i!=Parts.rend; ++i) { assert(W.SourceBefore(i->srcbegin +1)); while (W.SourceBefore(i->srcbegin)) { if (W.CanMove && W.DestBefore(i->dstbegin)) W.Move; else W.Delete; }           while (W.SourceBefore(i->srcend)) { if (W.CanMove) W.Move; else W.Insert; }           assert(W.DestBefore(i->dstend +1)); }       TIMER(M::emit); } };

class StairReason { public: const char* X;   int Y;    MI_Block reason; StairReason(const char* x, int y, const MI_Block &r): X(x),Y(y),reason(r) {} StairReason {} };


 * 1) define QUICKSTAIRS


 * 1) ifdef QUICKSTAIRS

class tStuetzen { private: static StairReason *Data; StairReason* End; public: tStuetzen { /*	 Make sure Data is aligned (as a preliminary measure towards SSE, see below). We just hope that there will never be more than 1e6 data points. This is more than reasonable as Martial Arts calls insert 13e6 times spread over all >1500 versions, so there are probably always *by* *far* less than 1e4 data points. */       if (!Data) { posix_memalign((void**)&(Data), 16, (MILLION)*sizeof(*Data)); }       End = Data; }   typedef StairReason *iterator; typedef const StairReason *const_iterator; iterator end const { return End; } iterator begin const { return Data; } iterator rbegin const { return End-1; } int size const {return End-Data; } void push_back(const StairReason& A) { *End++ = A; } void insert(iterator before, const StairReason& X) { /*	 Trying to squeeze a cpu cycle might be worthwhile, as (e.g in MartialArts) insert might execute 13e6 times. The memmove version costs ~933 cycles, which sums up to ~6% of total run time. The asm version with SSE *should* work, but somehow doesn't.	*/ asm volatile (       " movdqu (%2),%%xmm0;"        "1: movdqa (%0),%%xmm1;"        " movdqa %%xmm0,(%0);"        " movdqa %%xmm1,%%xmm0;"        " add $16,%0;"        " cmp %0,%1;"        " jnb 1b;"        : "=r"(End)        : "r"(End), "r"(&X), "0"(before)        : "memory"        ); memmove(before+1,before,(char*)(End++)- (char*)before); memmove(before,&X,sizeof(StairReason)); }   void erase(iterator first, iterator after) {	/*	 Cf. insert routine above. memmove version has ~479 cycles, contributes ~3% */       asm volatile (        " sub %1,%2 ;"        " jnb 1f;"        "2: movdqa (%1),%%xmm0;"        "   movdqa %%xmm0,(%2,%1);"        "   add $16,%1;"        "   cmp %1,%0;"        "   jne 2b;"        "   add %2,%0;"        "1: "        : "=r"(End)        : "r"(after), "r"(first), "0"(End)        : "memory"        ); if (first tStuetzen; tStuetzen Stuetzen; MI_Block Dummy; mutable int oldM; public: StairFunc: oldM(MILLION) { Stuetzen.push_back(StairReason(GlobalFinalstr,0,Dummy)); }   bool Restrict(const MI_Block &R0) { TIMER(Restrict); TIMER(Restrict Prep); assert(R0.dstend); const char * x0 = R0.dstend; int y0 = (*this)[R0.dstbegin] + R0.getCost; // insert (x0,y0) unless ex. (x,y) with y0-y>=x0-x && y0>=y // then remove all (x,y) with y-y0>=x-x0 && y>=y0 TIMER(Restrict Prep); TIMER(Restrict Find); tStuetzen::iterator before,after = Stuetzen.end; --after; for (after->Y > y0 && after->Y - y0 < after->X - x0; --after); if (after->Y <= y0 && after->Y - y0 <= after->X - x0) { TIMER(Restrict Find); TIMER(Restrict); return false; }           for (before = after; before->Y >= y0; --before); TIMER(Restrict Find);
 * 1) ifndef QUICKSTAIRS
 * 1) endif

if (before == after) { TIMER(Restrict Ins); Stuetzen.insert(before+1,StairReason(x0,y0,R0)); TIMER(Restrict Ins); } else { TIMER(Restrict Chg); *++before = StairReason(x0,y0,R0); TIMER(Restrict Chg); TIMER(Restrict Del); ++before; ++after; Stuetzen.erase(before,after); TIMER(Restrict Del); }           TIMER(Restrict); return true; }   /*	Calculate the stairfunc at a given position x    */ int operator[] (const char* x) const { assert(x); TIMER(at); /*	 About 1% of all calls are to the left of the first data point. */       if (x <= Stuetzen[0].X) { COUNTER(at first); TIMER(at); return Stuetzen[0].Y;       } int A = 0; int B = Stuetzen.size-1; /*	 This is fairly rare (less than .1%), but needs special treatment */       if (x >= Stuetzen[B].X) { COUNTER(at last); TIMER(at); return Stuetzen[B].Y + (x-Stuetzen[B].X); }	{		int M;		if (oldM>1; while (B-A > 1) { int ofs = Stuetzen[M].X - x;	           if (ofs > 0) B = M;	           else if (ofs<0) A = M;	           else { /*			 About 5% of queries hit exactly, about 85% of these hit the same spot again! */ 			COUNTER(at hit); if (M==oldM) COUNTER(at hit old); TIMER(at); return Stuetzen[oldM=M].Y;	           } M = (A+B)>>1; }	}	/*	 Generic case: Somewhere between data points */	COUNTER(at generic); int y = Stuetzen[A].Y + (x-Stuetzen[A].X); if (Stuetzen[B].Y < y) y = Stuetzen[B].Y;       TIMER(at); return y;   } // int operator[] (int x) const { return operator[](GlobalFinalstr+x); } const MI_Block& supp(int x) const { return supp(GlobalFinalstr + x); } // The last reason responsible for cost at x   const MI_Block& supp(const char* x) const { assert(x); if (x<=GlobalFinalstr) return Dummy; TIMER(supp); if (x >= Stuetzen.rbegin->X) { TIMER(supp); return Stuetzen.rbegin->reason; }       tStuetzen::const_iterator is = Stuetzen.begin; while ((x - is->X) > 0) ++is; tStuetzen::const_iterator is2 = is--; TIMER(supp); if ( x-is->X > is2->Y - is->Y) return is2->reason; else return is->reason; } };

class BlockEditDistance { private: int N;   int FinalLen; int* OfsLink; int totalchars; int TimeLimit; int TotalSaving; int Minimum_M_BlockSize; int GrowBackLimit; static int HashFunc(unsigned int x)   { return x % HASHSIZE; }   double Rate(int musecs = Deb.musecs) const {       return totalchars/(0.001*musecs); }   double TimeFactor(int musecs = Deb.musecs) const {       double rateX; if (totalchars<100000) rateX = 100000/(0.001*musecs); else rateX = Rate(musecs); return 1.0/(1+exp(4-rateX/200)); }   void PrepareFinal {       memset(Hash,-1,sizeof(Hash)); OfsLink = new int[FinalLen]; //memset(OfsLink,-1,sizeof(*OfsLink)*FinalLen); for (int i=0; i+3 &M_BlockCand) const {       TIMER(Collect_M_Blocks); for (int i=0; i+3<chars; i+=4) { unsigned int c4 = *(const unsigned int*)(str+i); for (int hofs = Hash[HashFunc(c4)]; hofs>=0; hofs=OfsLink[hofs]) { if (c4 != *(const unsigned int*)(GlobalFinalstr+hofs)) continue; // Hash collision if (i>3 && hofs>3 &&                    *(const unsigned int*)(str+i-4)==*(const unsigned int*)(GlobalFinalstr+hofs-4)) continue; // seen before int ipre=i, jpre=hofs; while (ipre && jpre && str[ipre-1]==GlobalFinalstr[jpre-1]) { --ipre; --jpre; }               int rest1 = FinalLen - hofs +i; const char* Wall = str + (rest1<chars? rest1 : chars) -4; const char *R1 = str+i+4, *R2 = GlobalFinalstr + hofs+4; while (R1 <= Wall && *(const unsigned*)R1==*(const unsigned*)R2) { R1+=4; R2+=4; }               Wall += 4; while (R1 < Wall && *R1==*R2) { R1++; R2++; }               int len = (R1-str)-ipre; if (len >= Minimum_M_BlockSize) { M_BlockCand.push_back(M_Block(ipre,jpre,len)); }           }        }        TIMER(Collect_M_Blocks); }   string gobble(const string& S, int &Saving) {       TIMER(gobble); const char* str = S.c_str; int chars = S.length; vector <M_Block> M_BlockCand; Collect_M_Blocks(str,chars, M_BlockCand); DEBOUT10(("Found %d blocks\n", M_BlockCand.size )); sort(M_BlockCand.begin, M_BlockCand.end); vector <M_Block>::iterator pb; StairFunc Cost; for (pb=M_BlockCand.begin; pb!=M_BlockCand.end; ++pb) { MI_Block GB(str + pb->srcA, GlobalFinalstr + pb->dstA, pb->Len); int unused=1; int unusedcount = 0; int growstep; do { if (Cost.Restrict(GB)) { unused = 0; } else if ((unusedcount += unused)>4) { COUNTER(unusedcount 4); break; }           } while ((growstep = GB.GrowFront(str,GlobalFinalstr))>0 && growstep<Minimum_M_BlockSize); if (unused) continue; GB = Cost.supp(GB.dstend); int gbcount=0; while (GB.GrowBack) { Cost.Restrict(GB); if (gbcount++>GrowBackLimit) { COUNTER(GrowBackLimit); break; }           }        }        DEBOUT10(("Cost estimation %d/%d\n", Cost[FinalLen], FinalLen)); const char *x = GlobalFinalstr + FinalLen; vector <MID_Block> UsedBlocks; while (x>GlobalFinalstr) { MI_Block GB = Cost.supp(x); if (!GB.srcbegin) break; if (GB.dstend > x)               GB.ChopEndTo(x); if (UsedBlocks.size && UsedBlocks.rbegin->WorthJoining(GB) ) UsedBlocks.rbegin->Join(GB); else UsedBlocks.push_back(GB); x = GB.dstbegin; }       Writer MID; string result_A; Saving = 0; for (vector <MID_Block>::reverse_iterator igb = UsedBlocks.rbegin; igb != UsedBlocks.rend; igb++) {           result_A += igb->IntervalString(str); igb->emit(MID); Saving += GlobalBasecost; }       MID.Close(GlobalFinalstr + FinalLen); DEBOUTLONGSTRING10(("MID=\"%s\"\n", zip(GlobalScratchpad))); Saving += MID.InsertStats + MID.DeleteStats; DEBOUTLONGSTRING10(("%s\n",result_A.c_str )); DEBOUT10(("%d x M, %d x I, %d x D\n", MID.MoveStats, MID.InsertStats, MID.DeleteStats )); TIMER(gobble); return result_A + GlobalScratchpad; } public: BlockEditDistance { DEBOUT(("Version %s\n", VERSION)); int IgnoreTime = Deb.musecs-10; if (IgnoreTime>0) { Deb.DecreaseTime(IgnoreTime); DEBOUT(("%g ms external preparation\n", IgnoreTime*.001)); }   }    vector transform(const vector &versions, int _B) { TIMER(Total); GlobalBasecost = _B; Minimum_M_BlockSize = GlobalBasecost +1; GrowBackLimit = MILLION; N = versions.size; const string& final = versions[N-1]; GlobalFinalstr = final.c_str; FinalLen = final.length; GlobalScratchpad = new char[FinalLen*2]; TotalSaving = 0; totalchars = 0; vector ::const_iterator pver; for (pver=versions.begin; pver != versions.end; ++pver) { totalchars += pver->length; }       TimeLimit = totalchars*(MILLION/500000); if (TimeLimit > 60*MILLION) TimeLimit = 60*MILLION; if (TimeLimit < 5*MILLION) TimeLimit = 5*MILLION; DEBOUT(("transform(%d x %d..%d, %d) %d chars in %gs\n", versions.size, versions[0].size, FinalLen, GlobalBasecost, totalchars,TimeLimit/(double)MILLION)); PrepareFinal; DEBOUTLONGSTRING(("Final <%s> prepared\n",GlobalFinalstr)); for { vector <M_Block> Self_M_Blocks; Collect_M_Blocks(GlobalFinalstr, FinalLen, Self_M_Blocks); DEBOUT(("Found %d self-blocks\n",Self_M_Blocks.size )); if (Self_M_Blocks.size < 1000) break; Minimum_M_BlockSize ++; if (Self_M_Blocks.size > 2000) ++Minimum_M_BlockSize; DEBOUT(("Increased min block size to %d\n", Minimum_M_BlockSize)); }       vector result(N-1,""); int baseline = FinalLen * (N-1); string emergency(FinalLen,'I');

TIMER(RevertQuest); set <pair <int,int> > TaskQ; vector DoAfter(N,-1); {           map <unsigned,pair<int,int> > RevertHashMap; for (int i=0; i<N-1; ++i) { unsigned H = StringHash(versions[i]); map <unsigned,pair<int,int> >::iterator where = RevertHashMap.find(H); if (where == RevertHashMap.end) { //DoAfter[i] = -1; if (i<N-1) RevertHashMap.insert(                           pair<unsigned,pair<int,int> > (H,pair<int,int>(1,i))                        ); } else { if (i<N-1) { DoAfter[i] = where->second.second; where->second.second = i;                       where->second.first ++; } else where->second.first += 10; }           }

for (map <unsigned,pair<int,int> >::iterator vals = RevertHashMap.begin;           vals != RevertHashMap.end; ++vals) { TaskQ.insert(vals->second); }       }        TIMER(RevertQuest); DEBOUT10(("%d Tasks\n",TaskQ.size)); TIMER(Tasking); bool PANIC = false; queue< pair <double,int> > ScoreQueue; int togo = N-1; for (set <pair <int,int> >::reverse_iterator task = TaskQ.rbegin; task!=TaskQ.rend; ++task) { int k = task->second; if (PANIC) { result[k] = emergency; --togo; continue; }           DEBOUT10(("Calc #%d\n",k)); int Saving=0; result[k] = gobble(versions[k],Saving); --togo; TotalSaving += Saving; for (int k2=DoAfter[k]; k2>=0; k2=DoAfter[k2]) { TIMER(PossibleRevert); if (versions[k2] != versions[k]) { DEBOUT(("Hash collision %d vs. %d\n", k,k2)); int kX; for (kX=k; kX!=k2; kX = DoAfter[kX]) if (versions[kX]==versions[k2]) break; int SpecialSaving; if (kX==k2) result[k2] = gobble(versions[k2],SpecialSaving); else { result[k2] = result[kX]; SpecialSaving = FinalLen - StringCost(result[k2].c_str); }                   TotalSaving += SpecialSaving; } else { DEBOUT10(("Reuse for #%d\n", k2)); result[k2] = result[k]; --togo; TotalSaving += Saving; }               TIMER(PossibleRevert); }           if (DoAfter[k]<0) { // singleton phase int Now = Deb.musecs; double haveRaw = TotalSaving/(double)baseline; ScoreQueue.push(pair<double,int>(haveRaw,Now)); if (ScoreQueue.size > 10) { pair<double,int> old = ScoreQueue.front; ScoreQueue.pop; double expectRaw = haveRaw + (haveRaw-old.first)/ScoreQueue.size; int AvgMusecs = (Now - old.second)/ScoreQueue.size; double haveTimefactor = TimeFactor(Now); double haveScore = haveRaw*haveTimefactor; // seems to improve, but is a hack. :)		   if (haveTimefactor < 0.9999 or Now>MILLION) 			GrowBackLimit = 8;

double expectTimefactor = TimeFactor(Now + AvgMusecs); double expectScore = expectRaw * expectTimefactor; DEBOUT10(("%d to go: %g * %g = %g (expect %g * %g = %g)\n", togo, haveRaw, haveTimefactor, haveScore, expectRaw, expectTimefactor, expectScore )); if (Now + 10*AvgMusecs > TimeLimit) { DEBOUT(("%d to go: %d mus vs %d mus might be too risky for %g\n", togo, AvgMusecs,TimeLimit-Now,expectRaw-haveRaw)); }                   if (Now + 3*AvgMusecs > TimeLimit) { DEBOUT(("%d to go: Need %gms per case!\n", togo, AvgMusecs*.001)); PANIC = true; } else if (expectScore < haveScore) { DEBOUT(("%d to go: Expect score decrease %g -> %g by %g%%!\n",togo,haveScore,expectScore,(haveScore-expectScore)/haveScore*100.0 )); PANIC = true; }               }            }        }        TIMER(Tasking);

DEBOUT(("Raw Score(%d off %d) = %g; Rate = %gkB/s; TimeFactor = %g => %g\n", TotalSaving,baseline,TotalSaving/(double)baseline, Rate,TimeFactor,TotalSaving/(double)baseline*TimeFactor));

TIMER(Total); Profiler::Print; return result; } };
 * 1) ifdef PROFILE
 * 1) endif

//#define FN "Philippine_Airlines.txt" //#define FN "Mork.txt"
 * 1) ifdef WIN32
 * 2) define FN "Jahwist.txt"

printf("\n#%d: '%c%c'-(%c)->'%c%c', %d to go : %s\n",what, sblock[0],sblock[1],c,dst[0],dst[1],stogo,err); \ assert(cond); } void Verify(int what, const char* src, const char* dst, const char* prog) {   const char* MID=prog; while (*MID <'D') ++MID; const char* sblock=NULL; int stogo=0; char c;   while (c=*MID++) { if (c=='I') { myassert(*dst,"INS after completion"); ++dst; } else { if (!stogo) { int first,last,count; int t=sscanf(prog,"%d-%d %n",&first,&last,&count); myassert(t==2,"Missing block"); prog += count; sblock = src + first; stogo = last-first+1; }           assert(stogo>0); if (c=='M') { myassert(*dst == *sblock,"Mismatched MOVE"); ++dst; } else { myassert(c=='D',"Bad MID code"); }           myassert(*sblock,"Overlong block"); ++sblock; --stogo; }   }    c='?'; myassert(!*dst,"Incomplete"); myassert(!stogo,"M_Block debris"); myassert(*prog>='D',"Extraneous block"); }
 * 1) define myassert(cond,err) if (!(cond)) {\

int main(int argc, char*argv[]) { vector ver;

FILE* in = fopen("C:\\Programme\\Microsoft Visual Studio\\MyProjects\\mm18\\Debug\\" FN,"rt"); assert(in); int nv=0; while (!feof(in)) { ++nv; string ac; char buf[INBUFS]; buf[0]=0; for { buf[INBUFS-2]=0; if (!fgets(buf,INBUFS,in)) break; if (buf[INBUFS-2]==0 || buf[INBUFS-2]=='\n') break; ac += buf; }       int n=strlen(buf); if (n) { if (buf[n-1]=='\n') buf[n-1]=0; ac += buf; }       if (ac.length) { //           if (nv==1 || nv==59) ver.push_back(ac); }   }    fclose(in); ver.push_back("abcdefghikjlmnopqrstuvwxyz"); ver.push_back("abcdefghijklmnopqrstuvawbxcydzawbxcydzefghijklmnopqrstuvwxyz"); BlockEditDistance foo; vector result = foo.transform(ver,16); int N=ver.size-1; assert(result.size == N); DEBOUTLONGSTRING(("result0 = <%s>\n", result[0].c_str )); for (int i=0; i<N; ++i) { Verify(i,ver[i].c_str, ver[N].c_str, result[i].c_str); }   return 0; }
 * 1) ifdef FN
 * 2) define INBUFS 2048
 * 1) else
 * 1) endif
 * 1) endif