Comment by Luyt
13 years ago
Yeah, like very quick lowercasing of blocks of ASCII text, 8 characters at a time:
char *strlwrmmx(char *buffer,int len=-1)
{
if (len==-1) len=strlen(buffer);
unsigned __int64 a=0x4040404040404040; // 0x40='A'-1
unsigned __int64 z=0x5B5B5B5B5B5B5B5B; // 0x5B='Z'+1
unsigned __int64 c=0x2020202020202020; // conversion offset for lowercase->uppercase
asm {
mov edi,buffer
mov ecx,len
convblock:
cmp ecx,8
jl rest
movq mm4,c // Eight copies of conversion value
movq mm2,a // Put eight "A" characters in mm2
movq mm3,z // Put eight "Z" characters in mm3
movq mm0,[edi] // Get next eight characters of our string
movq mm1,mm0 // We need two copies
pcmpgtb mm1,mm2 // Generate 1's in MM1 everywhere chars >= 'A'
pcmpgtb mm3,mm0 // Generate 1's in MM3 everywhere chars <= 'Z'
pand mm1,mm3 // Generate 1's in MM1 when 'A'<=chars<='Z'
pand mm1,mm4 // Generates $20 in each spot we have a l.c. char
paddb mm0,mm1 // Convert uppercase chars to lowercase by adding $20
movq [edi],mm0 // Store back in buffer
sub ecx,8
add edi,8
jmp convblock
rest: // convert any left characters one by one
cmp ecx,0
je done
convchar:
mov al,[edi]
cmp al,'A'
jb skip
cmp al,'Z'
ja skip
add al,0x20
mov [edi],al
skip:
inc edi
loop convchar
done:
}
return buffer;
}
Out of curiosity what kind of program were you writing that it was bottlenecked by a UTF8 lowercase conversion?
A text indexer. It did case-insensitive searching, so I lowercased everything (both source text and search terms), allowing me to use a quicker substring search (i.e., no need to compare characters with case insensitivity).
The indexer had to chew huge amounts of text, so the quicker the lowercasing, the better.