metaphone.cpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896
  1. #include "platform.h"
  2. #include <time.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include "metaphone.h"
  6. ////////////////////////////////////////////////////////////////////////////////
  7. // Double Metaphone (c) 1998, 1999 by Lawrence Philips
  8. //
  9. // Slightly modified by Kevin Atkinson to fix several bugs and
  10. // to allow it to give back more than 4 characters.
  11. //
  12. // 13-Dec-00 mtw Modified to return a number (e.g. 77th returns 77)
  13. //
  14. // Placed in the public domain by Lawrence Philips
  15. //
  16. ////////////////////////////////////////////////////////////////////////////////
  17. #include "metaphone.h"
  18. #include <ctype.h>
  19. #define AND &&
  20. #define OR ||
  21. namespace nsDmetaphone {
  22. ////////////////////////////////////////////////////////////////////////////////
  23. //
  24. ////////////////////////////////////////////////////////////////////////////////
  25. MString::MString()
  26. {
  27. }
  28. ////////////////////////////////////////////////////////////////////////////////
  29. //
  30. ////////////////////////////////////////////////////////////////////////////////
  31. MString::MString(const char* in) : cString(in)
  32. {
  33. }
  34. ////////////////////////////////////////////////////////////////////////////////
  35. //
  36. ////////////////////////////////////////////////////////////////////////////////
  37. MString::MString(const cString& in) : cString(in)
  38. {
  39. }
  40. ////////////////////////////////////////////////////////////////////////////////
  41. //
  42. ////////////////////////////////////////////////////////////////////////////////
  43. bool MString::SlavoGermanic()
  44. {
  45. return (Find('W') OR Find('K') OR Find("CZ") OR Find("WITZ"));
  46. }
  47. ////////////////////////////////////////////////////////////////////////////////
  48. //
  49. ////////////////////////////////////////////////////////////////////////////////
  50. inline void MString::MetaphAdd(const char* main)
  51. {
  52. primary.Cat(main);
  53. secondary.Cat(main);
  54. }
  55. ////////////////////////////////////////////////////////////////////////////////
  56. //
  57. ////////////////////////////////////////////////////////////////////////////////
  58. inline void MString::MetaphAdd(const char main)
  59. {
  60. primary += main;
  61. secondary += main;
  62. }
  63. ////////////////////////////////////////////////////////////////////////////////
  64. //
  65. ////////////////////////////////////////////////////////////////////////////////
  66. inline void MString::MetaphAdd(const char* main, const char* alt)
  67. {
  68. if(*main)
  69. primary.Cat(main);
  70. if(*alt)
  71. {
  72. alternate = true;
  73. if(alt[0] != ' ')
  74. secondary.Cat(alt);
  75. }else
  76. if(*main AND (main[0] != ' '))
  77. secondary.Cat(main);
  78. }
  79. ////////////////////////////////////////////////////////////////////////////////
  80. //
  81. ////////////////////////////////////////////////////////////////////////////////
  82. bool MString::IsVowel(int at)
  83. {
  84. if((at < 0) OR (at >= length))
  85. return false;
  86. char it = GetAt(at);
  87. if((it == 'A') OR (it == 'E') OR (it == 'I') OR (it == 'O') OR (it == 'U') OR (it == 'Y') )
  88. return true;
  89. return false;
  90. }
  91. ////////////////////////////////////////////////////////////////////////////////
  92. //
  93. ////////////////////////////////////////////////////////////////////////////////
  94. bool MString::StringAt(int start, int len, ... )
  95. {
  96. if (start < 0) return false;
  97. char target[64];
  98. char* test;
  99. if (Len - start < len)
  100. {
  101. return false;
  102. }
  103. memcpy( target, Ptr + start, len );
  104. target[len] = 0;
  105. va_list sstrings;
  106. va_start(sstrings, len);
  107. do
  108. {
  109. test = va_arg(sstrings, char*);
  110. if(*test AND (strcmp(target, test) == 0))
  111. return true;
  112. }while(strcmp(test, ""));
  113. va_end(sstrings);
  114. return false;
  115. }
  116. ////////////////////////////////////////////////////////////////////////////////
  117. // main deal
  118. ////////////////////////////////////////////////////////////////////////////////
  119. void MString::DoubleMetaphone(cString &metaph, cString &metaph2)
  120. {
  121. int current = 0;
  122. length = Len;
  123. if(length < 1)
  124. return;
  125. last = length - 1;//zero based index
  126. alternate = false;
  127. primary = "";
  128. secondary = "";
  129. Upper();
  130. //pad the original string so that we can index beyond the edge of the world
  131. Cat(" ");
  132. //skip these when at start of word
  133. if(StringAt(0, 2, "GN", "KN", "PN", "WR", "PS", ""))
  134. current += 1;
  135. //Initial 'X' is pronounced 'Z' e.g. 'Xavier'
  136. if(GetAt(0) == 'X')
  137. {
  138. MetaphAdd('S'); //'Z' maps to 'S'
  139. current += 1;
  140. }
  141. if (isdigit(GetAt(0)))
  142. {
  143. while (isdigit(GetAt(current)) && current < length)
  144. {
  145. MetaphAdd(GetAt(current));
  146. current++;
  147. }
  148. }
  149. else while(true OR (primary.Len < 4) OR (secondary.Len < 4))
  150. ///////////main loop//////////////////////////
  151. {
  152. if(current >= length)
  153. break;
  154. switch(GetAt(current))
  155. {
  156. case 'A':
  157. case 'E':
  158. case 'I':
  159. case 'O':
  160. case 'U':
  161. case 'Y':
  162. if(current == 0)
  163. //all init vowels now map to 'A'
  164. MetaphAdd('A');
  165. current +=1;
  166. break;
  167. case 'B':
  168. //"-mb", e.g", "dumb", already skipped over...
  169. MetaphAdd('P');
  170. if(GetAt(current + 1) == 'B')
  171. current +=2;
  172. else
  173. current +=1;
  174. break;
  175. case '\307': // ascii 0xc7 = C with cedilla
  176. MetaphAdd('S');
  177. current += 1;
  178. break;
  179. case 'C':
  180. //various germanic
  181. if((current > 1)
  182. AND !IsVowel(current - 2)
  183. AND StringAt((current - 1), 3, "ACH", "")
  184. AND ((GetAt(current + 2) != 'I') AND ((GetAt(current + 2) != 'E')
  185. OR StringAt((current - 2), 6, "BACHER", "MACHER", "")) ))
  186. {
  187. MetaphAdd('K');
  188. current +=2;
  189. break;
  190. }
  191. //special case 'caesar'
  192. if((current == 0) AND StringAt(current, 6, "CAESAR", ""))
  193. {
  194. MetaphAdd('S');
  195. current +=2;
  196. break;
  197. }
  198. //italian 'chianti'
  199. if(StringAt(current, 4, "CHIA", ""))
  200. {
  201. MetaphAdd('K');
  202. current +=2;
  203. break;
  204. }
  205. if(StringAt(current, 2, "CH", ""))
  206. {
  207. //find 'michael'
  208. if((current > 0) AND StringAt(current, 4, "CHAE", ""))
  209. {
  210. MetaphAdd("K", "X");
  211. current +=2;
  212. break;
  213. }
  214. //greek roots e.g. 'chemistry', 'chorus'
  215. if((current == 0)
  216. AND (StringAt((current + 1), 5, "HARAC", "HARIS", "")
  217. OR StringAt((current + 1), 3, "HOR", "HYM", "HIA", "HEM", ""))
  218. AND !StringAt(0, 5, "CHORE", ""))
  219. {
  220. MetaphAdd('K');
  221. current +=2;
  222. break;
  223. }
  224. //germanic, greek, or otherwise 'ch' for 'kh' sound
  225. if((StringAt(0, 4, "VAN ", "VON ", "") OR StringAt(0, 3, "SCH", ""))
  226. // 'architect but not 'arch', 'orchestra', 'orchid'
  227. OR StringAt((current - 2), 6, "ORCHES", "ARCHIT", "ORCHID", "")
  228. OR StringAt((current + 2), 1, "T", "S", "")
  229. OR ((StringAt((current - 1), 1, "A", "O", "U", "E", "") OR (current == 0))
  230. //e.g., 'wachtler', 'wechsler', but not 'tichner'
  231. AND StringAt((current + 2), 1, "L", "R", "N", "M", "B", "H", "F", "V", "W", " ", "")))
  232. {
  233. MetaphAdd('K');
  234. }else{
  235. if(current > 0)
  236. {
  237. if(StringAt(0, 2, "MC", ""))
  238. //e.g., "McHugh"
  239. MetaphAdd('K');
  240. else
  241. MetaphAdd("X", "K");
  242. }else
  243. MetaphAdd('X');
  244. }
  245. current +=2;
  246. break;
  247. }
  248. //e.g, 'czerny'
  249. if(StringAt(current, 2, "CZ", "") AND !StringAt((current - 2), 4, "WICZ", ""))
  250. {
  251. MetaphAdd("S", "X");
  252. current += 2;
  253. break;
  254. }
  255. //e.g., 'focaccia'
  256. if(StringAt((current + 1), 3, "CIA", ""))
  257. {
  258. MetaphAdd('X');
  259. current += 3;
  260. break;
  261. }
  262. //double 'C', but not if e.g. 'McClellan'
  263. if(StringAt(current, 2, "CC", "") AND !((current == 1) AND (GetAt(0) == 'M')))
  264. {
  265. //'bellocchio' but not 'bacchus'
  266. if(StringAt((current + 2), 1, "I", "E", "H", "") AND !StringAt((current + 2), 2, "HU", ""))
  267. {
  268. //'accident', 'accede' 'succeed'
  269. if(((current == 1) AND (GetAt(current - 1) == 'A'))
  270. OR StringAt((current - 1), 5, "UCCEE", "UCCES", ""))
  271. MetaphAdd("KS");
  272. //'bacci', 'bertucci', other italian
  273. else
  274. MetaphAdd('X');
  275. current += 3;
  276. break;
  277. }else{//Pierce's rule
  278. MetaphAdd('K');
  279. current += 2;
  280. break;
  281. }
  282. }
  283. if(StringAt(current, 2, "CK", "CG", "CQ", ""))
  284. {
  285. MetaphAdd('K');
  286. current += 2;
  287. break;
  288. }
  289. if(StringAt(current, 2, "CI", "CE", "CY", ""))
  290. {
  291. //italian vs. english
  292. if(StringAt(current, 3, "CIO", "CIE", "CIA", ""))
  293. MetaphAdd("S", "X");
  294. else
  295. MetaphAdd('S');
  296. current += 2;
  297. break;
  298. }
  299. //else
  300. MetaphAdd('K');
  301. //name sent in 'mac caffrey', 'mac gregor
  302. if(StringAt((current + 1), 2, " C", " Q", " G", ""))
  303. current += 3;
  304. else
  305. if(StringAt((current + 1), 1, "C", "K", "Q", "")
  306. AND !StringAt((current + 1), 2, "CE", "CI", ""))
  307. current += 2;
  308. else
  309. current += 1;
  310. break;
  311. case 'D':
  312. if(StringAt(current, 2, "DG", ""))
  313. {
  314. if(StringAt((current + 2), 1, "I", "E", "Y", ""))
  315. {
  316. //e.g. 'edge'
  317. MetaphAdd('J');
  318. current += 3;
  319. break;
  320. }else{
  321. //e.g. 'edgar'
  322. MetaphAdd("TK");
  323. current += 2;
  324. break;
  325. }
  326. }
  327. if(StringAt(current, 2, "DT", "DD", ""))
  328. {
  329. MetaphAdd('T');
  330. current += 2;
  331. break;
  332. }
  333. //else
  334. MetaphAdd('T');
  335. current += 1;
  336. break;
  337. case 'F':
  338. if(GetAt(current + 1) == 'F')
  339. current += 2;
  340. else
  341. current += 1;
  342. MetaphAdd('F');
  343. break;
  344. case 'G':
  345. if(GetAt(current + 1) == 'H')
  346. {
  347. if((current > 0) AND !IsVowel(current - 1))
  348. {
  349. MetaphAdd('K');
  350. current += 2;
  351. break;
  352. }
  353. if(current < 3)
  354. {
  355. //'ghislane', ghiradelli
  356. if(current == 0)
  357. {
  358. if(GetAt(current + 2) == 'I')
  359. MetaphAdd('J');
  360. else
  361. MetaphAdd('K');
  362. current += 2;
  363. break;
  364. }
  365. }
  366. //Parker's rule (with some further refinements) - e.g., 'hugh'
  367. if(((current > 1) AND StringAt((current - 2), 1, "B", "H", "D", "") )
  368. //e.g., 'bough'
  369. OR ((current > 2) AND StringAt((current - 3), 1, "B", "H", "D", "") )
  370. //e.g., 'broughton'
  371. OR ((current > 3) AND StringAt((current - 4), 1, "B", "H", "") ) )
  372. {
  373. current += 2;
  374. break;
  375. }else{
  376. //e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
  377. if((current > 2)
  378. AND (GetAt(current - 1) == 'U')
  379. AND StringAt((current - 3), 1, "C", "G", "L", "R", "T", "") )
  380. {
  381. MetaphAdd('F');
  382. }else
  383. if((current > 0) AND GetAt(current - 1) != 'I')
  384. MetaphAdd('K');
  385. current += 2;
  386. break;
  387. }
  388. }
  389. if(GetAt(current + 1) == 'N')
  390. {
  391. if((current == 1) AND IsVowel(0) AND !SlavoGermanic())
  392. {
  393. MetaphAdd("KN", "N");
  394. }else
  395. //not e.g. 'cagney'
  396. if(!StringAt((current + 2), 2, "EY", "")
  397. AND (GetAt(current + 1) != 'Y') AND !SlavoGermanic())
  398. {
  399. MetaphAdd("N", "KN");
  400. }else
  401. MetaphAdd("KN");
  402. current += 2;
  403. break;
  404. }
  405. //'tagliaro'
  406. if(StringAt((current + 1), 2, "LI", "") AND !SlavoGermanic())
  407. {
  408. MetaphAdd("KL", "L");
  409. current += 2;
  410. break;
  411. }
  412. //-ges-,-gep-,-gel-, -gie- at beginning
  413. if((current == 0)
  414. AND ((GetAt(current + 1) == 'Y')
  415. OR StringAt((current + 1), 2, "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER", "")) )
  416. {
  417. MetaphAdd("K", "J");
  418. current += 2;
  419. break;
  420. }
  421. // -ger-, -gy-
  422. if((StringAt((current + 1), 2, "ER", "") OR (GetAt(current + 1) == 'Y'))
  423. AND !StringAt(0, 6, "DANGER", "RANGER", "MANGER", "")
  424. AND !StringAt((current - 1), 1, "E", "I", "")
  425. AND !StringAt((current - 1), 3, "RGY", "OGY", "") )
  426. {
  427. MetaphAdd("K", "J");
  428. current += 2;
  429. break;
  430. }
  431. // italian e.g, 'biaggi'
  432. if(StringAt((current + 1), 1, "E", "I", "Y", "") OR StringAt((current - 1), 4, "AGGI", "OGGI", ""))
  433. {
  434. //obvious germanic
  435. if((StringAt(0, 4, "VAN ", "VON ", "") OR StringAt(0, 3, "SCH", ""))
  436. OR StringAt((current + 1), 2, "ET", ""))
  437. MetaphAdd('K');
  438. else
  439. //always soft if french ending
  440. if(StringAt((current + 1), 4, "IER ", ""))
  441. MetaphAdd('J');
  442. else
  443. MetaphAdd("J", "K");
  444. current += 2;
  445. break;
  446. }
  447. if(GetAt(current + 1) == 'G')
  448. current += 2;
  449. else
  450. current += 1;
  451. MetaphAdd('K');
  452. break;
  453. case 'H':
  454. //only keep if first & before vowel or btw. 2 vowels
  455. if(((current == 0) OR IsVowel(current - 1))
  456. AND IsVowel(current + 1))
  457. {
  458. MetaphAdd('H');
  459. current += 2;
  460. }else//also takes care of 'HH'
  461. current += 1;
  462. break;
  463. case 'J':
  464. //obvious spanish, 'jose', 'san jacinto'
  465. if(StringAt(current, 4, "JOSE", "") OR StringAt(0, 4, "SAN ", "") )
  466. {
  467. if(((current == 0) AND (GetAt(current + 4) == ' ')) OR StringAt(0, 4, "SAN ", "") )
  468. MetaphAdd('H');
  469. else
  470. {
  471. MetaphAdd("J", "H");
  472. }
  473. current +=1;
  474. break;
  475. }
  476. if((current == 0) AND !StringAt(current, 4, "JOSE", ""))
  477. MetaphAdd("J", "A");//Yankelovich/Jankelowicz
  478. else
  479. //spanish pron. of e.g. 'bajador'
  480. if(IsVowel(current - 1)
  481. AND !SlavoGermanic()
  482. AND ((GetAt(current + 1) == 'A') OR (GetAt(current + 1) == 'O')))
  483. MetaphAdd("J", "H");
  484. else
  485. if(current == last)
  486. MetaphAdd("J", " ");
  487. else
  488. if(!StringAt((current + 1), 1, "L", "T", "K", "S", "N", "M", "B", "Z", "")
  489. AND !StringAt((current - 1), 1, "S", "K", "L", ""))
  490. MetaphAdd('J');
  491. if(GetAt(current + 1) == 'J')//it could happen!
  492. current += 2;
  493. else
  494. current += 1;
  495. break;
  496. case 'K':
  497. if(GetAt(current + 1) == 'K')
  498. current += 2;
  499. else
  500. current += 1;
  501. MetaphAdd('K');
  502. break;
  503. case 'L':
  504. if(GetAt(current + 1) == 'L')
  505. {
  506. //spanish e.g. 'cabrillo', 'gallegos'
  507. if(((current == (length - 3))
  508. AND StringAt((current - 1), 4, "ILLO", "ILLA", "ALLE", ""))
  509. OR ((StringAt((last - 1), 2, "AS", "OS", "") OR StringAt(last, 1, "A", "O", ""))
  510. AND StringAt((current - 1), 4, "ALLE", "")) )
  511. {
  512. MetaphAdd("L", " ");
  513. current += 2;
  514. break;
  515. }
  516. current += 2;
  517. }else
  518. current += 1;
  519. MetaphAdd('L');
  520. break;
  521. case 'M':
  522. if((StringAt((current - 1), 3, "UMB", "")
  523. AND (((current + 1) == last) OR StringAt((current + 2), 2, "ER", "")))
  524. //'dumb','thumb'
  525. OR (GetAt(current + 1) == 'M') )
  526. current += 2;
  527. else
  528. current += 1;
  529. MetaphAdd('M');
  530. break;
  531. case 'N':
  532. if(GetAt(current + 1) == 'N')
  533. current += 2;
  534. else
  535. current += 1;
  536. MetaphAdd('N');
  537. break;
  538. case '\321': // Ascii 0xD1 = capital N with tilde
  539. current += 1;
  540. MetaphAdd('N');
  541. break;
  542. case 'P':
  543. if(GetAt(current + 1) == 'H')
  544. {
  545. MetaphAdd('F');
  546. current += 2;
  547. break;
  548. }
  549. //also account for "campbell", "raspberry"
  550. if(StringAt((current + 1), 1, "P", "B", ""))
  551. current += 2;
  552. else
  553. current += 1;
  554. MetaphAdd('P');
  555. break;
  556. case 'Q':
  557. if(GetAt(current + 1) == 'Q')
  558. current += 2;
  559. else
  560. current += 1;
  561. MetaphAdd('K');
  562. break;
  563. case 'R':
  564. //french e.g. 'rogier', but exclude 'hochmeier'
  565. if((current == last)
  566. AND !SlavoGermanic()
  567. AND StringAt((current - 2), 2, "IE", "")
  568. AND !StringAt((current - 4), 2, "ME", "MA", ""))
  569. MetaphAdd("", "R");
  570. else
  571. MetaphAdd('R');
  572. if(GetAt(current + 1) == 'R')
  573. current += 2;
  574. else
  575. current += 1;
  576. break;
  577. case 'S':
  578. //special cases 'island', 'isle', 'carlisle', 'carlysle'
  579. if(StringAt((current - 1), 3, "ISL", "YSL", ""))
  580. {
  581. current += 1;
  582. break;
  583. }
  584. //special case 'sugar-'
  585. if((current == 0) AND StringAt(current, 5, "SUGAR", ""))
  586. {
  587. MetaphAdd("X", "S");
  588. current += 1;
  589. break;
  590. }
  591. if(StringAt(current, 2, "SH", ""))
  592. {
  593. //germanic
  594. if(StringAt((current + 1), 4, "HEIM", "HOEK", "HOLM", "HOLZ", ""))
  595. MetaphAdd('S');
  596. else
  597. MetaphAdd('X');
  598. current += 2;
  599. break;
  600. }
  601. //italian & armenian
  602. if(StringAt(current, 3, "SIO", "SIA", "") OR StringAt(current, 4, "SIAN", ""))
  603. {
  604. if(!SlavoGermanic())
  605. MetaphAdd("S", "X");
  606. else
  607. MetaphAdd('S');
  608. current += 3;
  609. break;
  610. }
  611. //german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
  612. //also, -sz- in slavic language altho in hungarian it is pronounced 's'
  613. if(((current == 0)
  614. AND StringAt((current + 1), 1, "M", "N", "L", "W", ""))
  615. OR StringAt((current + 1), 1, "Z", ""))
  616. {
  617. MetaphAdd("S", "X");
  618. if(StringAt((current + 1), 1, "Z", ""))
  619. current += 2;
  620. else
  621. current += 1;
  622. break;
  623. }
  624. if(StringAt(current, 2, "SC", ""))
  625. {
  626. //Schlesinger's rule
  627. if(GetAt(current + 2) == 'H')
  628. {
  629. //dutch origin, e.g. 'school', 'schooner'
  630. if(StringAt((current + 3), 2, "OO", "ER", "EN", "UY", "ED", "EM", ""))
  631. {
  632. //'schermerhorn', 'schenker'
  633. if(StringAt((current + 3), 2, "ER", "EN", ""))
  634. {
  635. MetaphAdd("X", "SK");
  636. }else
  637. MetaphAdd("SK");
  638. current += 3;
  639. break;
  640. }else{
  641. if((current == 0) AND !IsVowel(3) AND (GetAt(3) != 'W'))
  642. MetaphAdd("X", "S");
  643. else
  644. MetaphAdd('X');
  645. current += 3;
  646. break;
  647. }
  648. }
  649. if(StringAt((current + 2), 1, "I", "E", "Y", ""))
  650. {
  651. MetaphAdd('S');
  652. current += 3;
  653. break;
  654. }
  655. //else
  656. MetaphAdd("SK");
  657. current += 3;
  658. break;
  659. }
  660. //french e.g. 'resnais', 'artois'
  661. if((current == last) AND StringAt((current - 2), 2, "AI", "OI", ""))
  662. MetaphAdd("", "S");
  663. else
  664. MetaphAdd('S');
  665. if(StringAt((current + 1), 1, "S", "Z", ""))
  666. current += 2;
  667. else
  668. current += 1;
  669. break;
  670. case 'T':
  671. if(StringAt(current, 4, "TION", ""))
  672. {
  673. MetaphAdd('X');
  674. current += 3;
  675. break;
  676. }
  677. if(StringAt(current, 3, "TIA", "TCH", ""))
  678. {
  679. MetaphAdd('X');
  680. current += 3;
  681. break;
  682. }
  683. if(StringAt(current, 2, "TH", "")
  684. OR StringAt(current, 3, "TTH", ""))
  685. {
  686. //special case 'thomas', 'thames' or germanic
  687. if(StringAt((current + 2), 2, "OM", "AM", "")
  688. OR StringAt(0, 4, "VAN ", "VON ", "")
  689. OR StringAt(0, 3, "SCH", ""))
  690. {
  691. MetaphAdd('T');
  692. }else{
  693. MetaphAdd("0", "T");
  694. }
  695. current += 2;
  696. break;
  697. }
  698. if(StringAt((current + 1), 1, "T", "D", ""))
  699. current += 2;
  700. else
  701. current += 1;
  702. MetaphAdd('T');
  703. break;
  704. case 'V':
  705. if(GetAt(current + 1) == 'V')
  706. current += 2;
  707. else
  708. current += 1;
  709. MetaphAdd('F');
  710. break;
  711. case 'W':
  712. //can also be in middle of word
  713. if(StringAt(current, 2, "WR", ""))
  714. {
  715. MetaphAdd('R');
  716. current += 2;
  717. break;
  718. }
  719. if((current == 0)
  720. AND (IsVowel(current + 1) OR StringAt(current, 2, "WH", "")))
  721. {
  722. //Wasserman should match Vasserman
  723. if(IsVowel(current + 1))
  724. MetaphAdd("A", "F");
  725. else
  726. //need Uomo to match Womo
  727. MetaphAdd('A');
  728. }
  729. //Arnow should match Arnoff
  730. if(((current == last) AND IsVowel(current - 1))
  731. OR StringAt((current - 1), 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY", "")
  732. OR StringAt(0, 3, "SCH", ""))
  733. {
  734. MetaphAdd("", "F");
  735. current +=1;
  736. break;
  737. }
  738. //polish e.g. 'filipowicz'
  739. if(StringAt(current, 4, "WICZ", "WITZ", ""))
  740. {
  741. MetaphAdd("TS", "FX");
  742. current +=4;
  743. break;
  744. }
  745. //else skip it
  746. current +=1;
  747. break;
  748. case 'X':
  749. //french e.g. breaux
  750. if(!((current == last)
  751. AND (StringAt((current - 3), 3, "IAU", "EAU", "")
  752. OR StringAt((current - 2), 2, "AU", "OU", ""))) )
  753. MetaphAdd("KS");
  754. if(StringAt((current + 1), 1, "C", "X", ""))
  755. current += 2;
  756. else
  757. current += 1;
  758. break;
  759. case 'Z':
  760. //chinese pinyin e.g. 'zhao'
  761. if(GetAt(current + 1) == 'H')
  762. {
  763. MetaphAdd('J');
  764. current += 2;
  765. break;
  766. }else
  767. if(StringAt((current + 1), 2, "ZO", "ZI", "ZA", "")
  768. OR (SlavoGermanic() AND ((current > 0) AND GetAt(current - 1) != 'T')))
  769. {
  770. MetaphAdd("S", "TS");
  771. }
  772. else
  773. MetaphAdd('S');
  774. if(GetAt(current + 1) == 'Z')
  775. current += 2;
  776. else
  777. current += 1;
  778. break;
  779. default:
  780. current += 1;
  781. }
  782. }
  783. metaph = primary.Ptr;
  784. //only give back 4 char metaph
  785. //if(metaph.Len > 4)
  786. // metaph.SetAt(4,'\0');
  787. metaph2 = secondary.Ptr;
  788. //if(metaph2.Len > 4)
  789. // metaph2.SetAt(4,'\0');
  790. }
  791. }//namespace