負数は未対応。
import std.stdio; uint[] sort(uint[] arr){ scope uint[] buf = new uint[arr.length]; foreach(k;0..uint.sizeof){ size_t[0x101] count; foreach(a;arr) count[((a>>(8*k))&0xff) + 1]++;//FIXME: 0xffの時は数えなくていい。 foreach(i;0..count.length-1) count[i+1] += count[i]; foreach(a;arr) buf[count[(a>>(8*k))&0xff]++] = a; auto t = arr; arr = buf; buf = t; } return arr; } string[] sort(string[] arr){ scope string[] buf = new string[arr.length]; foreach(k;0..size_t.sizeof){ size_t[0x101] count; foreach(a;arr) count[((a.length>>(8*k))&0xff) + 1]++;//FIXME: 0xffの時は数えなくていい。 foreach(i;0..count.length-1) count[i+1] += count[i]; foreach(a;arr) buf[count[(a.length>>(8*k))&0xff]++] = a; auto t = arr; arr = buf; buf = t; } foreach(k;0..arr[$-1].length){ size_t[0x101] count; foreach(a;arr) count[(a.length>k?a[k]:0) + 1]++;//FIXME: 0xffの時は数えなくていい。 foreach(i;0..count.length-1) count[i+1] += count[i]; foreach(a;arr) buf[count[a.length>k?a[k]:0]++] = a; auto t = arr; arr = buf; buf = t; } return arr; } void main(){ writefln(sort([1,4,2,5,3,3,5,2,4,1,0x103,0x201,0x302])); writefln(sort(["a","c","ab","b","cd","abc"])); }
256 * 4のループが気になる。4byteづつなら16*8のループですむけど、他の所が増えるからなぁ。
要素が少ないときはコムソートの方が早い気ガス。
上はバケットソート版。下は逆写像ソート版。速度はあまり変わらない。ちなみに組み込みのソートより遅い。
uint[] sort(uint[] arr){ scope uint[] buf = new uint[arr.length]; scope uint[] next = new uint[arr.length]; foreach(k;0..uint.sizeof){ int[0x100] index = -1;//番兵 for (int i = arr.length-1; i >= 0; i--) { int x = ((arr[i]>>(8*k))&0xff); next[i] = index[x]; index[x] = i; } int j; foreach (x;0..0x100) for(int i = index[x];i >= 0;i = next[i]) buf[j++] = arr[i]; auto t = arr; arr = buf; buf = t; } return arr; }
高速化してみた。要素数が多ければ組み込みのよりも早いけど、正直必要性が(ry。
4ビットごと版。
uint[] sort(uint[] arr){ scope uint[] buf = new uint[arr.length]; size_t[0x88] count; foreach(ref a;arr){ count[( a &0xf) + 0x1 ]++; count[((a>>4 )&0xf) + 0x12]++; count[((a>>8 )&0xf) + 0x23]++; count[((a>>12)&0xf) + 0x34]++; count[((a>>16)&0xf) + 0x45]++; count[((a>>20)&0xf) + 0x56]++; count[((a>>24)&0xf) + 0x67]++; count[((a>>28)&0xf) + 0x78]++; } for(int i;i<0x10;i++){ count[i+0x1 ] += count[i ]; count[i+0x12] += count[i+0x11]; count[i+0x23] += count[i+0x22]; count[i+0x34] += count[i+0x33]; count[i+0x45] += count[i+0x44]; count[i+0x56] += count[i+0x55]; count[i+0x67] += count[i+0x66]; count[i+0x78] += count[i+0x77]; } foreach(ref a;arr) buf[count[ a &0xf ]++] = a; foreach(ref a;buf) arr[count[((a>>4) &0xf) + 0x11]++] = a; foreach(ref a;arr) buf[count[((a>>8) &0xf) + 0x22]++] = a; foreach(ref a;buf) arr[count[((a>>12)&0xf) + 0x33]++] = a; foreach(ref a;arr) buf[count[((a>>16)&0xf) + 0x44]++] = a; foreach(ref a;buf) arr[count[((a>>20)&0xf) + 0x55]++] = a; foreach(ref a;arr) buf[count[((a>>24)&0xf) + 0x66]++] = a; foreach(ref a;buf) arr[count[((a>>28)&0xf) + 0x77]++] = a; return arr; }
8ビットごと版。
uint[] sort(uint[] arr){ scope uint[] buf = new uint[arr.length]; size_t[0x808] count; foreach(ref a;arr){ count[( a &0xff) + 0x1 ]++; count[((a>>8 )&0xff) + 0x102]++; count[((a>>16)&0xff) + 0x203]++; count[((a>>24)&0xff) + 0x304]++; } foreach(i;0..0x100){ count[i+0x1 ] += count[i ]; count[i+0x102] += count[i+0x101]; count[i+0x203] += count[i+0x202]; count[i+0x304] += count[i+0x303]; } foreach(ref a;arr) buf[count[ a &0xff ]++] = a; foreach(ref a;buf) arr[count[((a>>8) &0xff) + 0x101]++] = a; foreach(ref a;arr) buf[count[((a>>16)&0xff) + 0x202]++] = a; foreach(ref a;buf) arr[count[((a>>24)&0xff) + 0x303]++] = a; return arr; }