Mercurial > libguess
annotate arabic_impl.c @ 3:70e2c306231e
- implemented dfa utility functions.
- added dfa.c.
- rewrote guess functions for ar, gr, hw and tr scripts with dfa utilities.
- guess functions for cjk scripts too.
| author | Yoshiki Yazawa <yaz@cc.rim.or.jp> |
|---|---|
| date | Thu, 12 Jun 2008 20:20:43 +0900 |
| parents | 754a4550c64e |
| children |
| rev | line source |
|---|---|
| 0 | 1 #include "libguess.h" |
|
2
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
2 #include "dfa.h" |
|
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
3 #include "guess_tab.c" |
| 0 | 4 |
|
3
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
5 /* precedence order */ |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
6 #define ORDER &utf8, &iso8859_6, &cp1256 |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
7 |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
8 /* encodings */ |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
9 static guess_dfa cp1256 = DFA_INIT(guess_cp1256_st, guess_cp1256_ar, "CP1256"); |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
10 static guess_dfa iso8859_6 = DFA_INIT(guess_iso8859_6_st, guess_iso8859_6_ar, "ISO-8859-6"); |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
11 static guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar, "UTF-8"); |
|
2
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
12 |
|
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
13 const char *guess_ar(const char *buf, int buflen) |
| 0 | 14 { |
| 15 int i; | |
|
3
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
16 const char *rv = NULL; |
|
2
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
17 guess_dfa *top = NULL; |
|
3
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
18 guess_dfa *order[] = { ORDER, NULL }; |
|
2
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
19 |
|
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
20 for (i = 0; i < buflen; i++) { |
|
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
21 int c = (unsigned char) buf[i]; |
|
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
22 |
|
3
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
23 /* special treatment of BOM */ |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
24 if (i == 0 && c == 0xff) { |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
25 if (i < buflen - 1) { |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
26 c = (unsigned char) buf[i + 1]; |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
27 if (c == 0xfe) |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
28 return UCS_2LE; |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
29 } |
|
2
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
30 } |
|
3
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
31 if (i == 0 && c == 0xfe) { |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
32 if (i < buflen - 1) { |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
33 c = (unsigned char) buf[i + 1]; |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
34 if (c == 0xff) |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
35 return UCS_2BE; |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
36 } |
|
2
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
37 } |
|
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
38 |
|
3
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
39 rv = dfa_process(order, c); |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
40 if(rv) |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
41 return rv; |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
42 |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
43 if (dfa_none(order)) { |
|
2
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
44 /* we ran out the possibilities */ |
|
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
45 return NULL; |
|
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
46 } |
| 0 | 47 } |
| 48 | |
|
3
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
49 top = dfa_top(order); |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
50 if (top) |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
51 return top->name; |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
52 else |
|
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
53 return NULL; |
| 0 | 54 } |
