metaphone.cpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899
  1. #include "platform.h"
  2. #include <time.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include "metaphone.h"
  6. ////////////////////////////////////////////////////////////////////////////////
  7. // Double Metaphone (c) 1998, 1999 by Lawrence Philips
  8. //
  9. // Slightly modified by Kevin Atkinson to fix several bugs and
  10. // to allow it to give back more than 4 characters.
  11. //
  12. // 13-Dec-00 mtw Modified to return a number (e.g. 77th returns 77)
  13. //
  14. // Placed in the public domain by Lawrence Philips
  15. //
  16. ////////////////////////////////////////////////////////////////////////////////
  17. #include "metaphone.h"
  18. #include <ctype.h>
  19. #define AND &&
  20. #define OR ||
  21. namespace nsDmetaphone {
  22. ////////////////////////////////////////////////////////////////////////////////
  23. //
  24. ////////////////////////////////////////////////////////////////////////////////
  25. MString::MString()
  26. {
  27. }
  28. ////////////////////////////////////////////////////////////////////////////////
  29. //
  30. ////////////////////////////////////////////////////////////////////////////////
  31. MString::MString(const char* in) : cString(in)
  32. {
  33. }
  34. ////////////////////////////////////////////////////////////////////////////////
  35. //
  36. ////////////////////////////////////////////////////////////////////////////////
  37. MString::MString(const cString& in) : cString(in)
  38. {
  39. }
  40. ////////////////////////////////////////////////////////////////////////////////
  41. //
  42. ////////////////////////////////////////////////////////////////////////////////
  43. bool MString::SlavoGermanic()
  44. {
  45. return (Find('W') OR Find('K') OR Find("CZ") OR Find("WITZ"));
  46. }
  47. ////////////////////////////////////////////////////////////////////////////////
  48. //
  49. ////////////////////////////////////////////////////////////////////////////////
  50. inline void MString::MetaphAdd(const char* main)
  51. {
  52. primary.Cat(main);
  53. secondary.Cat(main);
  54. }
  55. ////////////////////////////////////////////////////////////////////////////////
  56. //
  57. ////////////////////////////////////////////////////////////////////////////////
  58. inline void MString::MetaphAdd(const char main)
  59. {
  60. primary += main;
  61. secondary += main;
  62. }
  63. ////////////////////////////////////////////////////////////////////////////////
  64. //
  65. ////////////////////////////////////////////////////////////////////////////////
  66. inline void MString::MetaphAdd(const char* main, const char* alt)
  67. {
  68. if(*main)
  69. primary.Cat(main);
  70. if(*alt)
  71. {
  72. alternate = true;
  73. if(alt[0] != ' ')
  74. secondary.Cat(alt);
  75. }else
  76. if(*main AND (main[0] != ' '))
  77. secondary.Cat(main);
  78. }
  79. ////////////////////////////////////////////////////////////////////////////////
  80. //
  81. ////////////////////////////////////////////////////////////////////////////////
  82. bool MString::IsVowel(int at)
  83. {
  84. if((at < 0) OR (at >= length))
  85. return false;
  86. char it = GetAt(at);
  87. if((it == 'A') OR (it == 'E') OR (it == 'I') OR (it == 'O') OR (it == 'U') OR (it == 'Y') )
  88. return true;
  89. return false;
  90. }
  91. ////////////////////////////////////////////////////////////////////////////////
  92. //
  93. ////////////////////////////////////////////////////////////////////////////////
  94. bool MString::StringAt(int start, int len, ... )
  95. {
  96. if (start < 0) return false;
  97. char target[64];
  98. char* test;
  99. if (Len - start < len)
  100. {
  101. return false;
  102. }
  103. memcpy( target, Ptr + start, len );
  104. target[len] = 0;
  105. va_list sstrings;
  106. va_start(sstrings, len);
  107. do
  108. {
  109. test = va_arg(sstrings, char*);
  110. if(*test AND (strcmp(target, test) == 0))
  111. {
  112. va_end(sstrings);
  113. return true;
  114. }
  115. }while(strcmp(test, ""));
  116. va_end(sstrings);
  117. return false;
  118. }
  119. ////////////////////////////////////////////////////////////////////////////////
  120. // main deal
  121. ////////////////////////////////////////////////////////////////////////////////
  122. void MString::DoubleMetaphone(cString &metaph, cString &metaph2)
  123. {
  124. int current = 0;
  125. length = Len;
  126. if(length < 1)
  127. return;
  128. last = length - 1;//zero based index
  129. alternate = false;
  130. primary = "";
  131. secondary = "";
  132. Upper();
  133. //pad the original string so that we can index beyond the edge of the world
  134. Cat(" ");
  135. //skip these when at start of word
  136. if(StringAt(0, 2, "GN", "KN", "PN", "WR", "PS", ""))
  137. current += 1;
  138. //Initial 'X' is pronounced 'Z' e.g. 'Xavier'
  139. if(GetAt(0) == 'X')
  140. {
  141. MetaphAdd('S'); //'Z' maps to 'S'
  142. current += 1;
  143. }
  144. if (isdigit(GetAt(0)))
  145. {
  146. while (isdigit(GetAt(current)) && current < length)
  147. {
  148. MetaphAdd(GetAt(current));
  149. current++;
  150. }
  151. }
  152. else while(true OR (primary.Len < 4) OR (secondary.Len < 4))
  153. ///////////main loop//////////////////////////
  154. {
  155. if(current >= length)
  156. break;
  157. switch(GetAt(current))
  158. {
  159. case 'A':
  160. case 'E':
  161. case 'I':
  162. case 'O':
  163. case 'U':
  164. case 'Y':
  165. if(current == 0)
  166. //all init vowels now map to 'A'
  167. MetaphAdd('A');
  168. current +=1;
  169. break;
  170. case 'B':
  171. //"-mb", e.g", "dumb", already skipped over...
  172. MetaphAdd('P');
  173. if(GetAt(current + 1) == 'B')
  174. current +=2;
  175. else
  176. current +=1;
  177. break;
  178. case '\307': // ascii 0xc7 = C with cedilla
  179. MetaphAdd('S');
  180. current += 1;
  181. break;
  182. case 'C':
  183. //various germanic
  184. if((current > 1)
  185. AND !IsVowel(current - 2)
  186. AND StringAt((current - 1), 3, "ACH", "")
  187. AND ((GetAt(current + 2) != 'I') AND ((GetAt(current + 2) != 'E')
  188. OR StringAt((current - 2), 6, "BACHER", "MACHER", "")) ))
  189. {
  190. MetaphAdd('K');
  191. current +=2;
  192. break;
  193. }
  194. //special case 'caesar'
  195. if((current == 0) AND StringAt(current, 6, "CAESAR", ""))
  196. {
  197. MetaphAdd('S');
  198. current +=2;
  199. break;
  200. }
  201. //italian 'chianti'
  202. if(StringAt(current, 4, "CHIA", ""))
  203. {
  204. MetaphAdd('K');
  205. current +=2;
  206. break;
  207. }
  208. if(StringAt(current, 2, "CH", ""))
  209. {
  210. //find 'michael'
  211. if((current > 0) AND StringAt(current, 4, "CHAE", ""))
  212. {
  213. MetaphAdd("K", "X");
  214. current +=2;
  215. break;
  216. }
  217. //greek roots e.g. 'chemistry', 'chorus'
  218. if((current == 0)
  219. AND (StringAt((current + 1), 5, "HARAC", "HARIS", "")
  220. OR StringAt((current + 1), 3, "HOR", "HYM", "HIA", "HEM", ""))
  221. AND !StringAt(0, 5, "CHORE", ""))
  222. {
  223. MetaphAdd('K');
  224. current +=2;
  225. break;
  226. }
  227. //germanic, greek, or otherwise 'ch' for 'kh' sound
  228. if((StringAt(0, 4, "VAN ", "VON ", "") OR StringAt(0, 3, "SCH", ""))
  229. // 'architect but not 'arch', 'orchestra', 'orchid'
  230. OR StringAt((current - 2), 6, "ORCHES", "ARCHIT", "ORCHID", "")
  231. OR StringAt((current + 2), 1, "T", "S", "")
  232. OR ((StringAt((current - 1), 1, "A", "O", "U", "E", "") OR (current == 0))
  233. //e.g., 'wachtler', 'wechsler', but not 'tichner'
  234. AND StringAt((current + 2), 1, "L", "R", "N", "M", "B", "H", "F", "V", "W", " ", "")))
  235. {
  236. MetaphAdd('K');
  237. }else{
  238. if(current > 0)
  239. {
  240. if(StringAt(0, 2, "MC", ""))
  241. //e.g., "McHugh"
  242. MetaphAdd('K');
  243. else
  244. MetaphAdd("X", "K");
  245. }else
  246. MetaphAdd('X');
  247. }
  248. current +=2;
  249. break;
  250. }
  251. //e.g, 'czerny'
  252. if(StringAt(current, 2, "CZ", "") AND !StringAt((current - 2), 4, "WICZ", ""))
  253. {
  254. MetaphAdd("S", "X");
  255. current += 2;
  256. break;
  257. }
  258. //e.g., 'focaccia'
  259. if(StringAt((current + 1), 3, "CIA", ""))
  260. {
  261. MetaphAdd('X');
  262. current += 3;
  263. break;
  264. }
  265. //double 'C', but not if e.g. 'McClellan'
  266. if(StringAt(current, 2, "CC", "") AND !((current == 1) AND (GetAt(0) == 'M')))
  267. {
  268. //'bellocchio' but not 'bacchus'
  269. if(StringAt((current + 2), 1, "I", "E", "H", "") AND !StringAt((current + 2), 2, "HU", ""))
  270. {
  271. //'accident', 'accede' 'succeed'
  272. if(((current == 1) AND (GetAt(current - 1) == 'A'))
  273. OR StringAt((current - 1), 5, "UCCEE", "UCCES", ""))
  274. MetaphAdd("KS");
  275. //'bacci', 'bertucci', other italian
  276. else
  277. MetaphAdd('X');
  278. current += 3;
  279. break;
  280. }else{//Pierce's rule
  281. MetaphAdd('K');
  282. current += 2;
  283. break;
  284. }
  285. }
  286. if(StringAt(current, 2, "CK", "CG", "CQ", ""))
  287. {
  288. MetaphAdd('K');
  289. current += 2;
  290. break;
  291. }
  292. if(StringAt(current, 2, "CI", "CE", "CY", ""))
  293. {
  294. //italian vs. english
  295. if(StringAt(current, 3, "CIO", "CIE", "CIA", ""))
  296. MetaphAdd("S", "X");
  297. else
  298. MetaphAdd('S');
  299. current += 2;
  300. break;
  301. }
  302. //else
  303. MetaphAdd('K');
  304. //name sent in 'mac caffrey', 'mac gregor
  305. if(StringAt((current + 1), 2, " C", " Q", " G", ""))
  306. current += 3;
  307. else
  308. if(StringAt((current + 1), 1, "C", "K", "Q", "")
  309. AND !StringAt((current + 1), 2, "CE", "CI", ""))
  310. current += 2;
  311. else
  312. current += 1;
  313. break;
  314. case 'D':
  315. if(StringAt(current, 2, "DG", ""))
  316. {
  317. if(StringAt((current + 2), 1, "I", "E", "Y", ""))
  318. {
  319. //e.g. 'edge'
  320. MetaphAdd('J');
  321. current += 3;
  322. break;
  323. }else{
  324. //e.g. 'edgar'
  325. MetaphAdd("TK");
  326. current += 2;
  327. break;
  328. }
  329. }
  330. if(StringAt(current, 2, "DT", "DD", ""))
  331. {
  332. MetaphAdd('T');
  333. current += 2;
  334. break;
  335. }
  336. //else
  337. MetaphAdd('T');
  338. current += 1;
  339. break;
  340. case 'F':
  341. if(GetAt(current + 1) == 'F')
  342. current += 2;
  343. else
  344. current += 1;
  345. MetaphAdd('F');
  346. break;
  347. case 'G':
  348. if(GetAt(current + 1) == 'H')
  349. {
  350. if((current > 0) AND !IsVowel(current - 1))
  351. {
  352. MetaphAdd('K');
  353. current += 2;
  354. break;
  355. }
  356. if(current < 3)
  357. {
  358. //'ghislane', ghiradelli
  359. if(current == 0)
  360. {
  361. if(GetAt(current + 2) == 'I')
  362. MetaphAdd('J');
  363. else
  364. MetaphAdd('K');
  365. current += 2;
  366. break;
  367. }
  368. }
  369. //Parker's rule (with some further refinements) - e.g., 'hugh'
  370. if(((current > 1) AND StringAt((current - 2), 1, "B", "H", "D", "") )
  371. //e.g., 'bough'
  372. OR ((current > 2) AND StringAt((current - 3), 1, "B", "H", "D", "") )
  373. //e.g., 'broughton'
  374. OR ((current > 3) AND StringAt((current - 4), 1, "B", "H", "") ) )
  375. {
  376. current += 2;
  377. break;
  378. }else{
  379. //e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
  380. if((current > 2)
  381. AND (GetAt(current - 1) == 'U')
  382. AND StringAt((current - 3), 1, "C", "G", "L", "R", "T", "") )
  383. {
  384. MetaphAdd('F');
  385. }else
  386. if((current > 0) AND GetAt(current - 1) != 'I')
  387. MetaphAdd('K');
  388. current += 2;
  389. break;
  390. }
  391. }
  392. if(GetAt(current + 1) == 'N')
  393. {
  394. if((current == 1) AND IsVowel(0) AND !SlavoGermanic())
  395. {
  396. MetaphAdd("KN", "N");
  397. }else
  398. //not e.g. 'cagney'
  399. if(!StringAt((current + 2), 2, "EY", "")
  400. AND (GetAt(current + 1) != 'Y') AND !SlavoGermanic())
  401. {
  402. MetaphAdd("N", "KN");
  403. }else
  404. MetaphAdd("KN");
  405. current += 2;
  406. break;
  407. }
  408. //'tagliaro'
  409. if(StringAt((current + 1), 2, "LI", "") AND !SlavoGermanic())
  410. {
  411. MetaphAdd("KL", "L");
  412. current += 2;
  413. break;
  414. }
  415. //-ges-,-gep-,-gel-, -gie- at beginning
  416. if((current == 0)
  417. AND ((GetAt(current + 1) == 'Y')
  418. OR StringAt((current + 1), 2, "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER", "")) )
  419. {
  420. MetaphAdd("K", "J");
  421. current += 2;
  422. break;
  423. }
  424. // -ger-, -gy-
  425. if((StringAt((current + 1), 2, "ER", "") OR (GetAt(current + 1) == 'Y'))
  426. AND !StringAt(0, 6, "DANGER", "RANGER", "MANGER", "")
  427. AND !StringAt((current - 1), 1, "E", "I", "")
  428. AND !StringAt((current - 1), 3, "RGY", "OGY", "") )
  429. {
  430. MetaphAdd("K", "J");
  431. current += 2;
  432. break;
  433. }
  434. // italian e.g, 'biaggi'
  435. if(StringAt((current + 1), 1, "E", "I", "Y", "") OR StringAt((current - 1), 4, "AGGI", "OGGI", ""))
  436. {
  437. //obvious germanic
  438. if((StringAt(0, 4, "VAN ", "VON ", "") OR StringAt(0, 3, "SCH", ""))
  439. OR StringAt((current + 1), 2, "ET", ""))
  440. MetaphAdd('K');
  441. else
  442. //always soft if french ending
  443. if(StringAt((current + 1), 4, "IER ", ""))
  444. MetaphAdd('J');
  445. else
  446. MetaphAdd("J", "K");
  447. current += 2;
  448. break;
  449. }
  450. if(GetAt(current + 1) == 'G')
  451. current += 2;
  452. else
  453. current += 1;
  454. MetaphAdd('K');
  455. break;
  456. case 'H':
  457. //only keep if first & before vowel or btw. 2 vowels
  458. if(((current == 0) OR IsVowel(current - 1))
  459. AND IsVowel(current + 1))
  460. {
  461. MetaphAdd('H');
  462. current += 2;
  463. }else//also takes care of 'HH'
  464. current += 1;
  465. break;
  466. case 'J':
  467. //obvious spanish, 'jose', 'san jacinto'
  468. if(StringAt(current, 4, "JOSE", "") OR StringAt(0, 4, "SAN ", "") )
  469. {
  470. if(((current == 0) AND (GetAt(current + 4) == ' ')) OR StringAt(0, 4, "SAN ", "") )
  471. MetaphAdd('H');
  472. else
  473. {
  474. MetaphAdd("J", "H");
  475. }
  476. current +=1;
  477. break;
  478. }
  479. if((current == 0) AND !StringAt(current, 4, "JOSE", ""))
  480. MetaphAdd("J", "A");//Yankelovich/Jankelowicz
  481. else
  482. //spanish pron. of e.g. 'bajador'
  483. if(IsVowel(current - 1)
  484. AND !SlavoGermanic()
  485. AND ((GetAt(current + 1) == 'A') OR (GetAt(current + 1) == 'O')))
  486. MetaphAdd("J", "H");
  487. else
  488. if(current == last)
  489. MetaphAdd("J", " ");
  490. else
  491. if(!StringAt((current + 1), 1, "L", "T", "K", "S", "N", "M", "B", "Z", "")
  492. AND !StringAt((current - 1), 1, "S", "K", "L", ""))
  493. MetaphAdd('J');
  494. if(GetAt(current + 1) == 'J')//it could happen!
  495. current += 2;
  496. else
  497. current += 1;
  498. break;
  499. case 'K':
  500. if(GetAt(current + 1) == 'K')
  501. current += 2;
  502. else
  503. current += 1;
  504. MetaphAdd('K');
  505. break;
  506. case 'L':
  507. if(GetAt(current + 1) == 'L')
  508. {
  509. //spanish e.g. 'cabrillo', 'gallegos'
  510. if(((current == (length - 3))
  511. AND StringAt((current - 1), 4, "ILLO", "ILLA", "ALLE", ""))
  512. OR ((StringAt((last - 1), 2, "AS", "OS", "") OR StringAt(last, 1, "A", "O", ""))
  513. AND StringAt((current - 1), 4, "ALLE", "")) )
  514. {
  515. MetaphAdd("L", " ");
  516. current += 2;
  517. break;
  518. }
  519. current += 2;
  520. }else
  521. current += 1;
  522. MetaphAdd('L');
  523. break;
  524. case 'M':
  525. if((StringAt((current - 1), 3, "UMB", "")
  526. AND (((current + 1) == last) OR StringAt((current + 2), 2, "ER", "")))
  527. //'dumb','thumb'
  528. OR (GetAt(current + 1) == 'M') )
  529. current += 2;
  530. else
  531. current += 1;
  532. MetaphAdd('M');
  533. break;
  534. case 'N':
  535. if(GetAt(current + 1) == 'N')
  536. current += 2;
  537. else
  538. current += 1;
  539. MetaphAdd('N');
  540. break;
  541. case '\321': // Ascii 0xD1 = capital N with tilde
  542. current += 1;
  543. MetaphAdd('N');
  544. break;
  545. case 'P':
  546. if(GetAt(current + 1) == 'H')
  547. {
  548. MetaphAdd('F');
  549. current += 2;
  550. break;
  551. }
  552. //also account for "campbell", "raspberry"
  553. if(StringAt((current + 1), 1, "P", "B", ""))
  554. current += 2;
  555. else
  556. current += 1;
  557. MetaphAdd('P');
  558. break;
  559. case 'Q':
  560. if(GetAt(current + 1) == 'Q')
  561. current += 2;
  562. else
  563. current += 1;
  564. MetaphAdd('K');
  565. break;
  566. case 'R':
  567. //french e.g. 'rogier', but exclude 'hochmeier'
  568. if((current == last)
  569. AND !SlavoGermanic()
  570. AND StringAt((current - 2), 2, "IE", "")
  571. AND !StringAt((current - 4), 2, "ME", "MA", ""))
  572. MetaphAdd("", "R");
  573. else
  574. MetaphAdd('R');
  575. if(GetAt(current + 1) == 'R')
  576. current += 2;
  577. else
  578. current += 1;
  579. break;
  580. case 'S':
  581. //special cases 'island', 'isle', 'carlisle', 'carlysle'
  582. if(StringAt((current - 1), 3, "ISL", "YSL", ""))
  583. {
  584. current += 1;
  585. break;
  586. }
  587. //special case 'sugar-'
  588. if((current == 0) AND StringAt(current, 5, "SUGAR", ""))
  589. {
  590. MetaphAdd("X", "S");
  591. current += 1;
  592. break;
  593. }
  594. if(StringAt(current, 2, "SH", ""))
  595. {
  596. //germanic
  597. if(StringAt((current + 1), 4, "HEIM", "HOEK", "HOLM", "HOLZ", ""))
  598. MetaphAdd('S');
  599. else
  600. MetaphAdd('X');
  601. current += 2;
  602. break;
  603. }
  604. //italian & armenian
  605. if(StringAt(current, 3, "SIO", "SIA", "") OR StringAt(current, 4, "SIAN", ""))
  606. {
  607. if(!SlavoGermanic())
  608. MetaphAdd("S", "X");
  609. else
  610. MetaphAdd('S');
  611. current += 3;
  612. break;
  613. }
  614. //german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
  615. //also, -sz- in slavic language altho in hungarian it is pronounced 's'
  616. if(((current == 0)
  617. AND StringAt((current + 1), 1, "M", "N", "L", "W", ""))
  618. OR StringAt((current + 1), 1, "Z", ""))
  619. {
  620. MetaphAdd("S", "X");
  621. if(StringAt((current + 1), 1, "Z", ""))
  622. current += 2;
  623. else
  624. current += 1;
  625. break;
  626. }
  627. if(StringAt(current, 2, "SC", ""))
  628. {
  629. //Schlesinger's rule
  630. if(GetAt(current + 2) == 'H')
  631. {
  632. //dutch origin, e.g. 'school', 'schooner'
  633. if(StringAt((current + 3), 2, "OO", "ER", "EN", "UY", "ED", "EM", ""))
  634. {
  635. //'schermerhorn', 'schenker'
  636. if(StringAt((current + 3), 2, "ER", "EN", ""))
  637. {
  638. MetaphAdd("X", "SK");
  639. }else
  640. MetaphAdd("SK");
  641. current += 3;
  642. break;
  643. }else{
  644. if((current == 0) AND !IsVowel(3) AND (GetAt(3) != 'W'))
  645. MetaphAdd("X", "S");
  646. else
  647. MetaphAdd('X');
  648. current += 3;
  649. break;
  650. }
  651. }
  652. if(StringAt((current + 2), 1, "I", "E", "Y", ""))
  653. {
  654. MetaphAdd('S');
  655. current += 3;
  656. break;
  657. }
  658. //else
  659. MetaphAdd("SK");
  660. current += 3;
  661. break;
  662. }
  663. //french e.g. 'resnais', 'artois'
  664. if((current == last) AND StringAt((current - 2), 2, "AI", "OI", ""))
  665. MetaphAdd("", "S");
  666. else
  667. MetaphAdd('S');
  668. if(StringAt((current + 1), 1, "S", "Z", ""))
  669. current += 2;
  670. else
  671. current += 1;
  672. break;
  673. case 'T':
  674. if(StringAt(current, 4, "TION", ""))
  675. {
  676. MetaphAdd('X');
  677. current += 3;
  678. break;
  679. }
  680. if(StringAt(current, 3, "TIA", "TCH", ""))
  681. {
  682. MetaphAdd('X');
  683. current += 3;
  684. break;
  685. }
  686. if(StringAt(current, 2, "TH", "")
  687. OR StringAt(current, 3, "TTH", ""))
  688. {
  689. //special case 'thomas', 'thames' or germanic
  690. if(StringAt((current + 2), 2, "OM", "AM", "")
  691. OR StringAt(0, 4, "VAN ", "VON ", "")
  692. OR StringAt(0, 3, "SCH", ""))
  693. {
  694. MetaphAdd('T');
  695. }else{
  696. MetaphAdd("0", "T");
  697. }
  698. current += 2;
  699. break;
  700. }
  701. if(StringAt((current + 1), 1, "T", "D", ""))
  702. current += 2;
  703. else
  704. current += 1;
  705. MetaphAdd('T');
  706. break;
  707. case 'V':
  708. if(GetAt(current + 1) == 'V')
  709. current += 2;
  710. else
  711. current += 1;
  712. MetaphAdd('F');
  713. break;
  714. case 'W':
  715. //can also be in middle of word
  716. if(StringAt(current, 2, "WR", ""))
  717. {
  718. MetaphAdd('R');
  719. current += 2;
  720. break;
  721. }
  722. if((current == 0)
  723. AND (IsVowel(current + 1) OR StringAt(current, 2, "WH", "")))
  724. {
  725. //Wasserman should match Vasserman
  726. if(IsVowel(current + 1))
  727. MetaphAdd("A", "F");
  728. else
  729. //need Uomo to match Womo
  730. MetaphAdd('A');
  731. }
  732. //Arnow should match Arnoff
  733. if(((current == last) AND IsVowel(current - 1))
  734. OR StringAt((current - 1), 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY", "")
  735. OR StringAt(0, 3, "SCH", ""))
  736. {
  737. MetaphAdd("", "F");
  738. current +=1;
  739. break;
  740. }
  741. //polish e.g. 'filipowicz'
  742. if(StringAt(current, 4, "WICZ", "WITZ", ""))
  743. {
  744. MetaphAdd("TS", "FX");
  745. current +=4;
  746. break;
  747. }
  748. //else skip it
  749. current +=1;
  750. break;
  751. case 'X':
  752. //french e.g. breaux
  753. if(!((current == last)
  754. AND (StringAt((current - 3), 3, "IAU", "EAU", "")
  755. OR StringAt((current - 2), 2, "AU", "OU", ""))) )
  756. MetaphAdd("KS");
  757. if(StringAt((current + 1), 1, "C", "X", ""))
  758. current += 2;
  759. else
  760. current += 1;
  761. break;
  762. case 'Z':
  763. //chinese pinyin e.g. 'zhao'
  764. if(GetAt(current + 1) == 'H')
  765. {
  766. MetaphAdd('J');
  767. current += 2;
  768. break;
  769. }else
  770. if(StringAt((current + 1), 2, "ZO", "ZI", "ZA", "")
  771. OR (SlavoGermanic() AND ((current > 0) AND GetAt(current - 1) != 'T')))
  772. {
  773. MetaphAdd("S", "TS");
  774. }
  775. else
  776. MetaphAdd('S');
  777. if(GetAt(current + 1) == 'Z')
  778. current += 2;
  779. else
  780. current += 1;
  781. break;
  782. default:
  783. current += 1;
  784. }
  785. }
  786. metaph = primary.Ptr;
  787. //only give back 4 char metaph
  788. //if(metaph.Len > 4)
  789. // metaph.SetAt(4,'\0');
  790. metaph2 = secondary.Ptr;
  791. //if(metaph2.Len > 4)
  792. // metaph2.SetAt(4,'\0');
  793. }
  794. }//namespace