April 30, 2024, 10:54:16 PM

News:

Own IWBasic 2.x ? -----> Get your free upgrade to 3.x now.........


Unicode now supported

Started by sapero, February 23, 2006, 02:10:39 AM

Previous topic - Next topic

0 Members and 1 Guest are viewing this topic.

sapero

February 23, 2006, 02:10:39 AM Last Edit: February 23, 2006, 02:35:38 AM by sapero
Hello all,
Browsing the source code for nasm from \redist folder I have found a way to let the assembler support unicode.
In file preproc.c changed:int __stdcall MultiByteToWideChar(int cp,int flag,char *ansi,int len,char *uni,int len2);
/*
*ÂÃ,  this function creates a new Token and passes a pointer to it
*ÂÃ,  back to the caller.ÂÃ,  It sets the type and text elements, and
*ÂÃ,  also the mac and next elements to NULL.
*/
static Token *
new_Token(Token * next, int type, char *text, int txtlen)
{
ÂÃ,  ÂÃ,  Token *t;
ÂÃ,  ÂÃ,  int i;
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  char *cc, start;
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  unsigned char uni[4];ÂÃ,  // sapero

ÂÃ,  ÂÃ,  if (freeTokens == NULL)
ÂÃ,  ÂÃ,  {
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  freeTokens = (Token *)new_Block(TOKEN_BLOCKSIZE * sizeof(Token));
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  for (i = 0; i < TOKEN_BLOCKSIZE - 1; i++)
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  freeTokens[i].next = &freeTokens[i + 1];
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  freeTokens[i].next = NULL;
ÂÃ,  ÂÃ,  }
ÂÃ,  ÂÃ,  t = freeTokens;
ÂÃ,  ÂÃ,  freeTokens = t->next;
ÂÃ,  ÂÃ,  t->next = next;
ÂÃ,  ÂÃ,  t->mac = NULL;
ÂÃ,  ÂÃ,  t->type = type;
ÂÃ,  ÂÃ,  if (type == TOK_WHITESPACE || text == NULL)
ÂÃ,  ÂÃ,  {
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  t->text = NULL;
ÂÃ,  ÂÃ,  }
ÂÃ,  ÂÃ,  else
ÂÃ,  ÂÃ,  {
ÂÃ,  ÂÃ,  if (txtlen == 0)
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  txtlen = strlen(text);

ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  /////////////////////////////////////////////
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  // nasm unicode support start (by sapero)
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  /////////////////////////////////////////////
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  //
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  // string "\Labcd" is replaced with
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  // "a",0,"b",0,"c",0,"d",0,0,0

ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  if (type==TOK_STRING && 0==strncmp(text, "\"\\L", 3) ) {

ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  t->text = nasm_malloc(((strlen(text)-2) * 6)+9);
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  cc = t->text;
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  start = *text;
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  text += 3; // skip "\L

ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  while (*text != start) {
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  MultiByteToWideChar(0,0,text,1,uni,1); // convert character
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  sprintf(cc, "\"%c\",%d,",uni[0],uni[1]); // use codepage
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  //sprintf(cc, "\"%c\",0,", *text); // no codepage
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  cc += strlen(cc); text++; // 6-8 characters
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  }
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  sprintf(cc, "0,0");
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  }
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  else {
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  // the original handler...
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  t->text = nasm_malloc(1 + txtlen);
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  strncpy(t->text, text, txtlen);
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  t->text[txtlen] = '\0';
ÂÃ,  ÂÃ,  ÂÃ,  ÂÃ,  }
ÂÃ,  ÂÃ,  }
ÂÃ,  ÂÃ,  return t;
}


usage is simple: "\Lx"ÂÃ,  where x is your string. Unicode string is 'assembled' into object file, so .asm output is not changed.
Works also in pure asm coding :)
invoke MessageBoxW, 0, "\Lunicode test", "\L", 0x40

OEM characters are supported by MultiByteToWideChar api
extern int MessageBoxW(int hwnd,string s1,opt string *s2,opt int icon=0x40);
global sub main()
{
MessageBoxW(0,"\Lunicode message box","\L");
return MessageBoxW(0,"\LÂÃ,¹ÃÆ'Ã,¦Ã‚Ã,¿ÃÆ'Ã,±Ã‚Ã,³ÃÆ'Ã,³ÃÆ'Ã,ªÃÆ'Ã,¤ÃÆ'Ã,¶ÃÆ'Ã,¼","\L");
}


and to see how the string is modified:#asm %error '"\Lunicode"'
#endasm

sapero

small change:            t->text = nasm_malloc(((strlen(text)-2) * 8)+4); // "aaa,bbb," * size + "0,0" + null
            cc = t->text;
            start = *text;
            text += 3; // skip "\L

            while (*text != start) {
                MultiByteToWideChar(0,0,text,1,uni,1); // convert character
                sprintf(cc, "%d,%d,",uni[0],uni[1]); // use codepage
                cc += strlen(cc); text++;
            }
            sprintf(cc, "0,0");
        }

both unicode characters are converted into digits: xxx,yyy,

Parker

Cool, now we have to get Paul to incorporate that.