April 18, 2024, 06:58:48 PM

News:

Own IWBasic 2.x ? -----> Get your free upgrade to 3.x now.........


Searching for string

Started by sapero, September 05, 2006, 07:51:53 AM

Previous topic - Next topic

0 Members and 1 Guest are viewing this topic.

sapero

September 05, 2006, 07:51:53 AM Last Edit: September 05, 2006, 07:59:16 AM by sapero
Here's a snippet for ultra fast finding a string.
If you have 65536 strings, you need to compare only (max) 16 times to find the string.
I've posted this tipp on IBStandard forum ;D
All what you need is sorted array of string pointers, number of strings and string to find:
// sort approximate time is calculated for given constants and my cpu speed.
//console app

// fast find string exaple
// requires sorted table with string pointers (use same compare function for sorting and searching)
// how it works:
// - set search range to maximum (0 to count-1)
// - set compare position 'in between' of the range ( min + 0.5(max-min)ÂÃ,  orÂÃ,  0.5(min+max)ÂÃ,  )
// - compare. if equal - found
//ÂÃ,  ÂÃ, if greater - move range min to position+1
//ÂÃ,  ÂÃ, if bellowÂÃ,  - move range max to position-1
//ÂÃ,  ÂÃ, if min <= max go to 'set compare position'
//
// maximum iterations: log2(number_of_strings) - also max 16 for 65536 strings

#define NUM_STRINGS 65536*16ÂÃ,  ÂÃ,  // number of strings (max 2^32 :P)
#define STRING_LEN_MIN 2ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ, // minimum characters in string (max 32767)
#define STRING_LEN_MAX 32ÂÃ,  ÂÃ,  ÂÃ,  // maximum characters in string (max 32767)
string* g_table[NUM_STRINGS];ÂÃ,  // array of string pointers to search
intÂÃ,  ÂÃ,  ÂÃ, g_nStrings;ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  // number of successfully allocted strings

import void cdecl qsort(void* array, int elements, int elementsize, void* cdecl_compare_fn);
import byte getch alias _getch();
import intÂÃ,  GetTickCount();
import intÂÃ,  lstrcmp alias lstrcmpA(string *a,string *b); // 0(a=b)ÂÃ,  ÂÃ, +(a>b)ÂÃ,  ÂÃ, -(a<b)



global sub main()
{
// fill the string pointer g_table
print("creating ",NUM_STRINGS," random strings, with ",STRING_LEN_MIN,"-",STRING_LEN_MAX," characters... ",);
int time = GetTickCount();
CreateStrings(STRING_LEN_MIN, STRING_LEN_MAX);
print("done ", (GetTickCount() - time)/1000.0, "s");

// any string failed?
if (g_nStrings != NUM_STRINGS)
print("\nOOPS! Out of memory! Created only ", g_nStrings, " strings\n");

// any string allocated?
if (g_nStrings == 0) goto __exit001;

// sort it
print("sorting (approx. ",g_nStrings/71920,"s)... ",);
time = GetTickCount();
// we sort pointers, so element size = sizeof(pointer)
qsort( &g_table, g_nStrings, sizeof(pointer), &SortModesCallback );
print("done ", (GetTickCount() - time)/1000.0, "s");

// pick a random string from g_table to search for
int randindex = (rand(32768) << 16) | rand(32768);
while (randindex > (g_nStrings-1)) {randindex /= 2;}

// search
int Iterations;
string *pStrToFind = g_table[randindex];
print("\n* string to search for: '", *pStrToFind, "'");
print("* position: ",randindex);
print("searching (case sensitible)... ",);
time = GetTickCount();
int pos = Find(pStrToFind, &Iterations);
print("done ", (GetTickCount() - time)/1000.0, "s");

// display stats
if (pos != -1) {
string *lpstr = g_table[pos];
print("\nfound stringÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  '", *lpstr, "'");
print("at position ", pos);
print("iterations: ",Iterations);}
else {
print("\nnot found!");}

// cleanup
print("\ndeleting strings... ",);
time = GetTickCount();
FreeStrings();
print("done ", (GetTickCount() - time)/1000.0, "s");

__exit001:
print("press any key to quit");
getch();
}




sub CreateStrings(int minlen, int maxlen)
{
// loop for all strings
g_nStrings = 0;
for (int pos = 0; pos<NUM_STRINGS; pos++)
{
// random string lenght
int strsize = rand(minlen, maxlen);

// allocate new string
byte *pstr = new(byte, strsize+1);

if (!pstr)
break;

g_nStrings++;

// save it in g_table
g_table[pos] = pstr;

// and fill it with random characters
for (int chr=0; chr<strsize; chr++)
{
// random character type: big/small letter or a digit
int type = rand(0,2);
int char;
ifÂÃ,  ÂÃ,  ÂÃ,  (type == 0) char = rand('0','9')
else if (type == 1) char = rand('a','z')
elseÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  char = rand('A','Z');
// write random character
*(byte)pstr[chr] = char;
}
// terminate the string with null
*(byte)pstr[strsize] = 0;
}
}



sub Find(string *pStrToFind,int *ppIterations),int
{
int min = 0;
int max = g_nStrings-1;
int cmp = -1;
*ppIterations = 0;

while (min <= max)
{
int pos = min + ((max - min) / 2);
//print(" - range: ",min,"-",max, " pos:",pos);
cmp = lstrcmp(pStrToFind, g_table[pos]);

if (cmp > 0) {min = pos+1;}
else if (cmp < 0) {max = pos-1;}
else break;
*ppIterations++;
}
if (cmp != 0) pos = -1;
return pos;
}


sub FreeStrings()
{
for (int pos = 0; pos<g_nStrings; pos++)
{
delete g_table[pos];
}
}




// SortModesCallback function should be cdecl
declare cdecl SortModesCallback(void* arg1, void* arg2 ),int;

// arg1 and arg1 - addresses of copared elements
sub SortModesCallback(void* arg1, void* arg2 ),int
{
return lstrcmp(*(pointer)arg1,*(pointer)arg2);
}

kryton9

Thanks Sapero, another fine example!! Will definitly be in my study list!!

sapero

After some tests: the lstrcmp api is very slow, it sorts much better with crt function strcmp, but the ultra speed you get with StrCmpC from shlwapi.dll :)

Compare sort time for 65536*4 strings with fixed size 256 characters:
  • lstrcmpÂÃ,  15ÂÃ,  ÂÃ, s
  • strcmpÂÃ,  ÂÃ,  4ÂÃ,  ÂÃ, s
  • StrCmpCÂÃ,  ÂÃ, 0.4 s

defines:
// string compare api
import intÂÃ,  lstrcmp alias lstrcmpA(string *a,string *b); // 0(a=b)ÂÃ,  ÂÃ, +(a>b)ÂÃ,  ÂÃ, -(a<b)
import int cdecl strcmp(string *s1,string *s2);
// this one is the fastest
#use "shlwapi.lib"
import int StrCmpC alias StrCmpCA(string *s1,string *s2);


To apply new compare function you need to replace the original lstrcmp in Find() and in SortModesCallback().
The SortModesCallback could be QsortCallback...
BTW sort time is not important here, only the search time counts here, and it is ~null :)

Ionic Wind Support Team

lstrcmpA is slow on NT systems if your using ANSI strings as it has to convert each string to unicode first and then calls the wide character version of the api.  If you use unicode strings you will find it quicker.

strcmp from C only works with ANSI
Ionic Wind Support Team