/*##############################################################################
## HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.  All rights reserved.
############################################################################## */

IMPORT lib_unicodelib;

EXPORT Uni := MODULE

/**
 * Returns the first string with all characters within the second string removed.
 * 
 * @param src           The string that is being tested.
 * @param filter        The string containing the set of characters to be excluded.
 * @see                 Std.Uni.Filter
 */
 
EXPORT unicode FilterOut(unicode src, unicode filter) :=
    lib_unicodelib.UnicodeLib.UnicodeFilterOut(src, filter);

/**
 * Returns the first string with all characters not within the second string removed.
 * 
 * @param src           The string that is being tested.
 * @param filter        The string containing the set of characters to be included.
 * @see                 Std.Uni.FilterOut
 */
 
EXPORT unicode Filter(unicode src, unicode filter) :=
    lib_unicodelib.UnicodeLib.UnicodeFilter(src, filter);

/**
 * Returns the source string with the replacement character substituted for all characters included in the
 * filter string.
 * MORE: Should this be a general string substitution?
 * 
 * @param src           The string that is being tested.
 * @param filter        The string containing the set of characters to be included.
 * @param replace_char  The character to be substituted into the result.
 * @see                 Std.Uni.SubstituteOut
 */

EXPORT unicode SubstituteIncluded(unicode src, unicode filter, unicode replace_char) :=
    lib_unicodelib.UnicodeLib.UnicodeSubstituteOut(src, filter, replace_char);

/**
 * Returns the source string with the replacement character substituted for all characters not included in the
 * filter string.
 * MORE: Should this be a general string substitution?
 * 
 * @param src           The string that is being tested.
 * @param filter        The string containing the set of characters to be included.
 * @param replace_char  The character to be substituted into the result.
 * @see                 Std.Uni.SubstituteIncluded
 */

EXPORT unicode SubstituteExcluded(unicode src, unicode filter, unicode replace_char) :=
    lib_unicodelib.UnicodeLib.UnicodeSubstitute(src, filter, replace_char);

/**
 * Returns the character position of the nth match of the search string with the first string.
 * If no match is found the attribute returns 0.
 * If an instance is omitted the position of the first instance is returned.
 * 
 * @param src           The string that is searched
 * @param sought        The string being sought.
 * @param instance      Which match instance are we interested in?
 */
 
EXPORT UNSIGNED4 Find(unicode src, unicode sought, unsigned4 instance) :=
    lib_unicodelib.UnicodeLib.UnicodeFind(src, sought, instance);

/**
 * Tests if the search string contains the supplied word as a whole word.
 *
 * @param src           The string that is being tested.
 * @param word          The word to be searched for.
 * @param ignore_case   Whether to ignore differences in case between characters.
 */

EXPORT BOOLEAN FindWord(UNICODE src, UNICODE word, BOOLEAN ignore_case=FALSE) := FUNCTION
   return IF (ignore_case,
              REGEXFIND(u'\\b'+word+u'\\b', src, NOCASE),
              REGEXFIND(u'\\b'+word+u'\\b', src));
END;

/**
 * Returns the character position of the nth match of the search string with the first string.
 * If no match is found the attribute returns 0.
 * If an instance is omitted the position of the first instance is returned.
 * 
 * @param src           The string that is searched
 * @param sought        The string being sought.
 * @param instance      Which match instance are we interested in?
 * @param locale_name   The locale to use for the comparison
 */
 
EXPORT UNSIGNED4 LocaleFind(unicode src, unicode sought, unsigned4 instance, varstring locale_name) :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleFind(src, sought, instance, locale_name);

/**
 * Returns the character position of the nth match of the search string with the first string.
 * If no match is found the attribute returns 0.
 * If an instance is omitted the position of the first instance is returned.
 * 
 * @param src           The string that is searched
 * @param sought        The string being sought.
 * @param instance      Which match instance are we interested in?
 * @param locale_name   The locale to use for the comparison
 * @param strength      The strength of the comparison
                        1 ignores accents and case, differentiating only between letters
                        2 ignores case but differentiates between accents.
                        3 differentiates between accents and case but ignores e.g. differences between Hiragana and Katakana
                        4 differentiates between accents and case and e.g. Hiragana/Katakana, but ignores e.g. Hebrew cantellation marks
                        5 differentiates between all strings whose canonically decomposed forms (NFD�Normalization Form D) are non-identical
*/
 
EXPORT UNSIGNED4 LocaleFindAtStrength(unicode src, unicode tofind, unsigned4 instance, varstring locale_name, integer1 strength) :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleFindAtStrength(src, tofind, instance, locale_name, strength);

/**
 * Returns the nth element from a comma separated string.
 * 
 * @param src           The string containing the comma separated list.
 * @param instance      Which item to select from the list.
 */

EXPORT unicode Extract(unicode src, unsigned4 instance) :=
    lib_unicodelib.UnicodeLib.UnicodeExtract(src, instance);

/**
 * Returns the argument string with all upper case characters converted to lower case.
 * 
 * @param src           The string that is being converted.
 */

EXPORT unicode ToLowerCase(unicode src) :=
    lib_unicodelib.UnicodeLib.UnicodeToLowerCase(src);

/**
 * Return the argument string with all lower case characters converted to upper case.
 * 
 * @param src           The string that is being converted.
 */

EXPORT unicode ToUpperCase(unicode src) :=
    lib_unicodelib.UnicodeLib.UnicodeToUpperCase(src);

/**
 * Returns the upper case variant of the string using the rules for a particular locale.
 * 
 * @param src           The string that is being converted.
 * @param locale_name   The locale to use for the comparison
 */

EXPORT unicode ToTitleCase(unicode src) :=
    lib_unicodelib.UnicodeLib.UnicodeToProperCase(src);

/**
 * Returns the lower case variant of the string using the rules for a particular locale.
 * 
 * @param src           The string that is being converted.
 * @param locale_name   The locale to use for the comparison
 */

EXPORT unicode LocaleToLowerCase(unicode src, varstring locale_name) :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleToLowerCase(src, locale_name);

/**
 * Returns the upper case variant of the string using the rules for a particular locale.
 * 
 * @param src           The string that is being converted.
 * @param locale_name   The locale to use for the comparison
 */

EXPORT unicode LocaleToUpperCase(unicode src, varstring locale_name) :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleToUpperCase(src, locale_name);

/**
 * Returns the upper case variant of the string using the rules for a particular locale.
 * 
 * @param src           The string that is being converted.
 * @param locale_name   The locale to use for the comparison
 */

EXPORT unicode LocaleToTitleCase(unicode src, varstring locale_name) :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleToProperCase(src, locale_name);

/**
 * Compares the two strings case insensitively.  Equivalent to comparing at strength 2.
 * 
 * @param src1          The first string to be compared.
 * @param src2          The second string to be compared.
 * @see                 Std.Uni.CompareAtStrength
 */
 
EXPORT integer4 CompareIgnoreCase(unicode src1, unicode src2) :=
    lib_unicodelib.UnicodeLib.UnicodeCompareIgnoreCase(src1, src2);

/**
 * Compares the two strings case insensitively.  Equivalent to comparing at strength 2.
 * 
 * @param src1          The first string to be compared.
 * @param src2          The second string to be compared.
 * @param strength      The strength of the comparison
                        1 ignores accents and case, differentiating only between letters
                        2 ignores case but differentiates between accents.
                        3 differentiates between accents and case but ignores e.g. differences between Hiragana and Katakana
                        4 differentiates between accents and case and e.g. Hiragana/Katakana, but ignores e.g. Hebrew cantellation marks
                        5 differentiates between all strings whose canonically decomposed forms (NFD�Normalization Form D) are non-identical
 * @see                 Std.Uni.CompareAtStrength
*/
 
EXPORT integer4 CompareAtStrength(unicode src1, unicode src2, integer1 strength) :=
    lib_unicodelib.UnicodeLib.UnicodeCompareAtStrength(src1, src2, strength);

/**
 * Compares the two strings case insensitively.  Equivalent to comparing at strength 2.
 * 
 * @param src1          The first string to be compared.
 * @param src2          The second string to be compared.
 * @param locale_name   The locale to use for the comparison
 * @see                 Std.Uni.CompareAtStrength
 */
 
EXPORT integer4 LocaleCompareIgnoreCase(unicode src1, unicode src2, varstring locale_name) :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleCompareIgnoreCase(src1, src2, locale_name);

/**
 * Compares the two strings case insensitively.  Equivalent to comparing at strength 2.
 * 
 * @param src1          The first string to be compared.
 * @param src2          The second string to be compared.
 * @param locale_name   The locale to use for the comparison
 * @param strength      The strength of the comparison
                        1 ignores accents and case, differentiating only between letters
                        2 ignores case but differentiates between accents.
                        3 differentiates between accents and case but ignores e.g. differences between Hiragana and Katakana
                        4 differentiates between accents and case and e.g. Hiragana/Katakana, but ignores e.g. Hebrew cantellation marks
                        5 differentiates between all strings whose canonically decomposed forms (NFD�Normalization Form D) are non-identical
*/

EXPORT integer4 LocaleCompareAtStrength(unicode src1, unicode src2, varstring locale_name, integer1 strength) :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleCompareAtStrength(src1, src2, locale_name, strength);

/**
 * Returns the argument string with all characters in reverse order.
 * Note the argument is not TRIMMED before it is reversed.
 * 
 * @param src           The string that is being reversed.
 */

EXPORT unicode Reverse(unicode src) :=
    lib_unicodelib.UnicodeLib.UnicodeReverse(src);

/**
 * Returns the source string with the replacement string substituted for all instances of the search string.
 * 
 * @param src           The string that is being transformed.
 * @param sought        The string to be replaced.
 * @param replacement   The string to be substituted into the result.
 */

EXPORT unicode FindReplace(unicode src, unicode sought, unicode replacement) :=
    lib_unicodelib.UnicodeLib.UnicodeFindReplace(src, sought, replacement);

/**
 * Returns the source string with the replacement string substituted for all instances of the search string.
 * 
 * @param src           The string that is being transformed.
 * @param sought        The string to be replaced.
 * @param replacement   The string to be substituted into the result.
 * @param locale_name   The locale to use for the comparison
 */

EXPORT unicode LocaleFindReplace(unicode src, unicode sought, unicode replacement, varstring locale_name) :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleFindReplace(src, sought, replacement, locale_name);

/**
 * Returns the source string with the replacement string substituted for all instances of the search string.
 * 
 * @param src           The string that is being transformed.
 * @param sought        The string to be replaced.
 * @param replacement   The string to be substituted into the result.
 * @param locale_name   The locale to use for the comparison
 * @param strength      The strength of the comparison
 */

EXPORT unicode LocaleFindAtStrengthReplace(unicode src, unicode sought, unicode replacement, varstring locale_name, integer1 strength) :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleFindAtStrengthReplace(src, sought, replacement, locale_name, strength);

/**
 * Returns the source string with all accented characters replaced with unaccented.
 * 
 * @param src           The string that is being transformed.
 */

EXPORT unicode CleanAccents(unicode src) :=
    lib_unicodelib.UnicodeLib.UnicodeCleanAccents(src);

/**
 * Returns the source string with all instances of multiple adjacent space characters (2 or more spaces together)
 * reduced to a single space character.  Leading and trailing spaces are removed, and tab characters are converted
 * to spaces.
 * 
 * @param src           The string to be cleaned.
 */

EXPORT unicode CleanSpaces(unicode src) :=
    lib_unicodelib.UnicodeLib.UnicodeCleanSpaces(src);

/**
 * Tests if the search string matches the pattern.
 * The pattern can contain wildcards '?' (single character) and '*' (multiple character).
 * 
 * @param src           The string that is being tested.
 * @param pattern       The pattern to match against.
 * @param ignore_case   Whether to ignore differences in case between characters
 */
 
EXPORT boolean WildMatch(unicode src, unicode _pattern, boolean _noCase) :=
    lib_unicodelib.UnicodeLib.UnicodeWildMatch(src, _pattern, _noCase);

/**
 * Tests if the search string contains each of the characters in the pattern.
 * If the pattern contains duplicate characters those characters will match once for each occurence in the pattern.
 * 
 * @param src           The string that is being tested.
 * @param pattern       The pattern to match against.
 * @param ignore_case   Whether to ignore differences in case between characters
 */
 
EXPORT BOOLEAN Contains(unicode src, unicode _pattern, boolean _noCase) :=
    lib_unicodelib.UnicodeLib.UnicodeContains(src, _pattern, _noCase);

/**
 * Returns the minimum edit distance between the two strings.  An insert change or delete counts as a single edit.
 * The two strings are trimmed before comparing.
 * 
 * @param _left         The first string to be compared.
 * @param _right        The second string to be compared.
 * @param localname     The locale to use for the comparison.  Defaults to ''.
 * @param radius        The maximum edit distance that is acceptable, or 0 for no limit.  Defaults to 0.
 * @return              The minimum edit distance between the two strings.  Edit distances above radius will
                        return an arbitrary value larger than radius.
 */

EXPORT UNSIGNED4 EditDistance(unicode _left, unicode _right, varstring localename = '', UNSIGNED4 radius = 0) :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleEditDistanceV2(_left, _right, localename, radius);

/**
 * Returns true if the minimum edit distance between the two strings is with a specific range.
 * The two strings are trimmed before comparing.
 * 
 * @param _left         The first string to be compared.
 * @param _right        The second string to be compared.
 * @param radius        The maximum edit distance that is acceptable.
 * @param localname     The locale to use for the comparison.  Defaults to ''.
 * @return              Whether or not the two strings are within the given specified edit distance.
 */

EXPORT BOOLEAN EditDistanceWithinRadius(unicode _left, unicode _right, unsigned4 radius, varstring localename = '') :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleEditDistanceWithinRadius(_left, _right, radius, localename);

/**
 * Returns the number of words in the string.  Word boundaries are marked by the unicode break semantics.
 * 
 * @param text          The string to be broken into words.
 * @param localname     The locale to use for the break semantics.  Defaults to ''.
 * @return              The number of words in the string.
 */

EXPORT unsigned4 WordCount(unicode text, varstring localename = '') :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleWordCount(text, localename);

/**
 * Returns the n-th word from the string.  Word boundaries are marked by the unicode break semantics.
 * 
 * @param text          The string to be broken into words.
 * @param n             Which word should be returned from the function.
 * @param localname     The locale to use for the break semantics.  Defaults to ''.
 * @return              The number of words in the string.
 */

EXPORT unicode GetNthWord(unicode text, unsigned4 n, varstring localename = '') :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleGetNthWord(text, n, localename);

/**
 * Returns everything but the string's nth word and some whitespaces. Words are marked by the unicode break semantics.
 * Trailing whitespaes are always removed with the word.
 * Leading whitespaces are only removed with the word if the nth word is the first word.
 * Returns a blank string if there are no words in the source string.
 * Returns the source string if the number of words in the string is less than the n parameter's assigned value.
 *
 * @param text          The string to be broken into words.
 * @param n             Which word should be removed from the string.
 * @param localname     The locale to use for the break semantics.  Defaults to ''.
 * @return              The string excluding the nth word.
 */

EXPORT ExcludeNthWord(unicode text, unsigned4 n, varstring localename = '') :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleExcludeNthWord(text, n, localename);

/**
 * Returns everything except the first word from the string.  Words are marked by the unicode break semantics.
 * Whitespace before and after the first word is also removed.
 *
 * @param text          The string to be broken into words.
 * @return              The string excluding the first word.
 */

EXPORT ExcludeFirstWord(unicode text, varstring localename = '') :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleExcludeNthWord(text, 1, localename);

/**
 * Returns everything except the last word from the string.  Word boundaries are marked by the unicode break semantics.
 * Whitespace after a word is removed with the word and leading whitespace is removed with the first word.
 *
 * @param text          The string to be broken into words.
 * @param localname     The locale to use for the break semantics. Defaults to ''.
 * @return              The string excluding the last word.
 */

EXPORT unicode ExcludeLastWord(unicode text, varstring localename = '') :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleExcludeLastWord(text, localename);

/**
 * Returns the source string with the all characters that match characters in the search string replaced
 * with the character at the corresponding position in the replacement string.
 * The isEmpty() tests in the beginning of the function check for invalid sequences in addition to blank strings.
 * If any of the isEmpty() tests are true, the function will return the source string.
 *
 * @param src           The string that is being tested.
 * @param search        The string containing the set of characters to be included.
 * @param replacement   The string containing the characters to act as replacements.
 * @return              The string containing the source string but with the translated characters.
 */

EXPORT Translate(unicode text, unicode search, unicode replacement) :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleTranslate(text, search, replacement);

/**
 * Returns true if the prefix string matches the leading characters in the source string.  Trailing and Leading spaces
 * are stripped from the prefix before matching. Unless specified, normalization will not occur. Unless initiated as hex and
 * then converted to Unicode using TRANSFER, ecl will perform its own normalization on your declared Unicode string.
 *
 * @param src           The string being searched in.
 * @param prefix        The prefix to search for.
 * @param form          The type of Normalization to be employed.
 */

EXPORT BOOLEAN StartsWith(unicode src, unicode prefix, string form) :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleStartsWith(src, prefix, form);

/**
 * Returns true if the suffix string matches the trailing characters in the source string.  Trailing and Leading spaces
 * are stripped from the suffix before matching. Unless specified, normalization will not occur. Unless initiated as hex and
 * then converted to Unicode using TRANSFER, ecl will perform its own normalization on your declared Unicode string.
 *
 * @param src           The string being searched in.
 * @param suffix        The suffix to search for.
 * @param form          The type of Normalization to be employed.
 */

EXPORT BOOLEAN EndsWith(unicode src, unicode suffix, string form) :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleEndsWith(src, suffix, form);

/**
 * Returns a string containing the version of icu being used to implement the unicode library.
 */

EXPORT STRING Version() := lib_unicodelib.UnicodeLib.UnicodeVersion();

/**
 * Removes the suffix from the search string, if present, and returns the result. Trailing spaces are
 * stripped from both strings before matching.
 *
 * @param src           The string being searched in.
 * @param suffix        The suffix to search for.
 * @param form          The type of Normalization to be employed.
 * @return              The string excluding the suffix, if endsWith is true
 */

EXPORT RemoveSuffix(unicode src, unicode suffix, string form) :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleRemoveSuffix(src, suffix, form);

/*
 * Returns a string containing text repeated n times.
 *
 * @param text          The string to be repeated.
 * @param n             Number of repetitions.
 * @return              A string containing n concatenations of the string text.
 */

EXPORT Repeat(unicode text, unsigned4 n) :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleRepeat(text, n);

/**
 * Returns the number of occurences of the second string within the first string.
 *
 * @param src           The string that is searched.
 * @param sought        The string being sought.
 * @param form          The optional, specified normalization form.
 * @return              The number of occurences, matches.
 */

EXPORT unsigned4 FindCount(unicode src, unicode sought, string form) :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleFindCount(src, sought, form);

/**
 * Returns the number of words that the string contains.  Words are separated by one or more separator strings. No 
 * spaces are stripped from either string before matching. allow_blank set to false by default.
 *
 * @param src               The string being searched in.
 * @param separator         The string used to separate words
 * @param allow_blank       Indicates if empty/blank string items are included in the results.
 * @return                  The number of delimited tokens in the source string
 */

EXPORT unsigned4 CountWords(unicode src, unicode separator, boolean allow_blank = FALSE) :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleCountWords(src, separator, allow_blank);

/**
 * Returns the delimited words that the string contains in a UnicodeSet.  Words are separated by one or more separator strings. No 
 * spaces are stripped from either string before matching. allow_blank is set to false by default.
 *
 * @param src               The string being searched in.
 * @param separator         The string used to separate words
 * @param allow_blank       Indicates if empty/blank string items are included in the results.
 * @return                  A UnicodeSet whose members are the delimited words
 */

EXPORT SplitWords(unicode src, unicode separator, boolean allow_blank = FALSE) :=
    lib_unicodelib.UnicodeLib.UnicodeLocaleSplitWords(src, separator, allow_blank);

END;