00001 /* 00002 openisis - an open implementation of the CDS/ISIS database 00003 Version 0.8.x (patchlevel see file Version) 00004 Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org 00005 00006 This library is free software; you can redistribute it and/or 00007 modify it under the terms of the GNU Lesser General Public 00008 License as published by the Free Software Foundation; either 00009 version 2.1 of the License, or (at your option) any later version. 00010 00011 This library is distributed in the hope that it will be useful, 00012 but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 Lesser General Public License for more details. 00015 00016 You should have received a copy of the GNU Lesser General Public 00017 License along with this library; if not, write to the Free Software 00018 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00019 00020 see README for more information 00021 EOH */ 00022 #ifndef LCS_H 00023 00024 /* 00025 $Id: lcs.h,v 1.3 2003/04/08 00:20:52 kripke Exp $ 00026 charset support 00027 */ 00028 00029 typedef enum { 00030 LCS_SINGLE, /* some ASCII-compliant single byte charset */ 00031 LCS_UTF8, /* unicode in 8bit transfer encoding */ 00032 LCS_UCS2 /* the unicode base multilingual plane (BMP, first 64K chars) 00033 in 2byte encoding, native (platform) byte order */ 00034 } lcs_type; 00035 00036 typedef enum { 00037 LCS_LATIN1, /* the ISO-8859-1 (Latin 1) charset */ 00038 LCS_CP850, /* MS-DOS Latin 1, known as the "OEM" charset */ 00039 LCS__SBCS /* # of single byte charsets */ 00040 } lcs_single; 00041 00042 00043 typedef enum { 00044 LCS_CTYPE, /* flags see below */ 00045 LCS_UNICO, /* 128 unicodes */ 00046 LCS_UCASE, /* each characters uppercase code */ 00047 LCS_INDEX, /* single byte recoding for index ("strxfrm") */ 00048 LCS_XEDNI, /* inverse */ 00049 LCS_RECOD, /* single byte recoding to extern (typically Latin1) */ 00050 LCS_DOCER, /* inverse */ 00051 LCS__TABS 00052 } lcs_tabid; 00053 00054 00055 /* some conversion table on a single byte charset 00056 c may hold an actab (!0 for any character deemed "alpha"), 00057 uctab ("uppercase" replacement code, typically removing diacriticals), 00058 code-to-code conversion or other. 00059 u may hold unicode values for the upper half of an 00060 ASCII compatible single byte charset. 00061 */ 00062 00063 typedef union { 00064 unsigned char c[256]; 00065 unsigned short u[128]; 00066 } LcsTab; 00067 00068 enum { 00069 /* controls (0..31,127) */ 00070 LCS_R = 0x00, /* record separator (FF,GS), */ 00071 LCS_F = 0x10, /* field separator (NUL,CR,LF,RS) */ 00072 LCS_C = 0x28, /* other control */ 00073 /* other separators */ 00074 LCS_S = 0x48, /* space(blank,nbsp), */ 00075 LCS_T = 0x68, /* other token separator (,:;=), */ 00076 /* other punctuation assumed "word" characters */ 00077 LCS_P = 0x80, 00078 LCS_Y = 0xb0, /* symbol */ 00079 /* identifiers */ 00080 LCS_I = 0xc0, /* C-ident (_). */ 00081 LCS_D = 0xd0, /* digits ('0'..'9') */ 00082 LCS_A = 0xe0, /* ASCII alpha */ 00083 LCS_L = 0xe4, /* ... among those formatting literals a,c,i,x */ 00084 LCS_N = 0xf0 /* other alpha ("national"/non-ASCII) */ 00085 }; 00086 00087 #define LCS_ISCONTR( t ) (!(0xc0 & (t))) 00088 #define LCS_ISSPACE( t ) (LCS_S >= (t)) 00089 #define LCS_ISPRINT( t ) (0xc0 & (t)) /* (LCS_S <= (t)) */ 00090 #define LCS_ISIDENT( t ) (0xc0 == (0xc0 & (t))) 00091 #define LCS_ISALPHA( t ) (0xe0 == (0xe0 & (t))) 00092 00093 /* for record parsing */ 00094 #define LCS_ISSEP( t ) (!(0x80 & (t))) 00095 #define LCS_ISWORD( t ) (0x80 & (t)) 00096 #define LCS_ISFR( t ) (!(0xe0 & (t))) /* field or record separator */ 00097 #define LCS_ISCST( t ) (0x08 & (t)) /* other separator */ 00098 00099 00100 enum { 00101 LCS_MKUNI = -1 /* as "bits" value, have mktab create unicode table */ 00102 }; 00103 00111 extern int lcs_mktab ( LcsTab *dst, char *numbers, int len, int bits ); 00112 00116 extern int lcs_mkrecod ( unsigned char *dst, unsigned short *from, 00117 unsigned short *to ); 00118 00119 extern unsigned char lcs_latin1_uc[256]; 00120 extern unsigned char lcs_latin1_ct[256]; 00121 00122 #define LCS_H 00123 #endif /* LCS_H */