123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536 |
- /*##############################################################################
- ## HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®. All rights reserved.
- ############################################################################## */
- IMPORT lib_unicodelib;
- EXPORT Uni := MODULE
- /**
- * Returns the first string with all characters within the second string removed.
- *
- * @param src The string that is being tested.
- * @param filter The string containing the set of characters to be excluded.
- * @see Std.Uni.Filter
- */
-
- EXPORT unicode FilterOut(unicode src, unicode filter) :=
- lib_unicodelib.UnicodeLib.UnicodeFilterOut(src, filter);
- /**
- * Returns the first string with all characters not within the second string removed.
- *
- * @param src The string that is being tested.
- * @param filter The string containing the set of characters to be included.
- * @see Std.Uni.FilterOut
- */
-
- EXPORT unicode Filter(unicode src, unicode filter) :=
- lib_unicodelib.UnicodeLib.UnicodeFilter(src, filter);
- /**
- * Returns the source string with the replacement character substituted for all characters included in the
- * filter string.
- * MORE: Should this be a general string substitution?
- *
- * @param src The string that is being tested.
- * @param filter The string containing the set of characters to be included.
- * @param replace_char The character to be substituted into the result.
- * @see Std.Uni.SubstituteOut
- */
- EXPORT unicode SubstituteIncluded(unicode src, unicode filter, unicode replace_char) :=
- lib_unicodelib.UnicodeLib.UnicodeSubstituteOut(src, filter, replace_char);
- /**
- * Returns the source string with the replacement character substituted for all characters not included in the
- * filter string.
- * MORE: Should this be a general string substitution?
- *
- * @param src The string that is being tested.
- * @param filter The string containing the set of characters to be included.
- * @param replace_char The character to be substituted into the result.
- * @see Std.Uni.SubstituteIncluded
- */
- EXPORT unicode SubstituteExcluded(unicode src, unicode filter, unicode replace_char) :=
- lib_unicodelib.UnicodeLib.UnicodeSubstitute(src, filter, replace_char);
- /**
- * Returns the character position of the nth match of the search string with the first string.
- * If no match is found the attribute returns 0.
- * If an instance is omitted the position of the first instance is returned.
- *
- * @param src The string that is searched
- * @param sought The string being sought.
- * @param instance Which match instance are we interested in?
- */
-
- EXPORT UNSIGNED4 Find(unicode src, unicode sought, unsigned4 instance) :=
- lib_unicodelib.UnicodeLib.UnicodeFind(src, sought, instance);
- /**
- * Tests if the search string contains the supplied word as a whole word.
- *
- * @param src The string that is being tested.
- * @param word The word to be searched for.
- * @param ignore_case Whether to ignore differences in case between characters.
- */
- EXPORT BOOLEAN FindWord(UNICODE src, UNICODE word, BOOLEAN ignore_case=FALSE) := FUNCTION
- return IF (ignore_case,
- REGEXFIND(u'\\b'+word+u'\\b', src, NOCASE),
- REGEXFIND(u'\\b'+word+u'\\b', src));
- END;
- /**
- * Returns the character position of the nth match of the search string with the first string.
- * If no match is found the attribute returns 0.
- * If an instance is omitted the position of the first instance is returned.
- *
- * @param src The string that is searched
- * @param sought The string being sought.
- * @param instance Which match instance are we interested in?
- * @param locale_name The locale to use for the comparison
- */
-
- EXPORT UNSIGNED4 LocaleFind(unicode src, unicode sought, unsigned4 instance, varstring locale_name) :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleFind(src, sought, instance, locale_name);
- /**
- * Returns the character position of the nth match of the search string with the first string.
- * If no match is found the attribute returns 0.
- * If an instance is omitted the position of the first instance is returned.
- *
- * @param src The string that is searched
- * @param sought The string being sought.
- * @param instance Which match instance are we interested in?
- * @param locale_name The locale to use for the comparison
- * @param strength The strength of the comparison
- 1 ignores accents and case, differentiating only between letters
- 2 ignores case but differentiates between accents.
- 3 differentiates between accents and case but ignores e.g. differences between Hiragana and Katakana
- 4 differentiates between accents and case and e.g. Hiragana/Katakana, but ignores e.g. Hebrew cantellation marks
- 5 differentiates between all strings whose canonically decomposed forms (NFD�Normalization Form D) are non-identical
- */
-
- EXPORT UNSIGNED4 LocaleFindAtStrength(unicode src, unicode tofind, unsigned4 instance, varstring locale_name, integer1 strength) :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleFindAtStrength(src, tofind, instance, locale_name, strength);
- /**
- * Returns the nth element from a comma separated string.
- *
- * @param src The string containing the comma separated list.
- * @param instance Which item to select from the list.
- */
- EXPORT unicode Extract(unicode src, unsigned4 instance) :=
- lib_unicodelib.UnicodeLib.UnicodeExtract(src, instance);
- /**
- * Returns the argument string with all upper case characters converted to lower case.
- *
- * @param src The string that is being converted.
- */
- EXPORT unicode ToLowerCase(unicode src) :=
- lib_unicodelib.UnicodeLib.UnicodeToLowerCase(src);
- /**
- * Return the argument string with all lower case characters converted to upper case.
- *
- * @param src The string that is being converted.
- */
- EXPORT unicode ToUpperCase(unicode src) :=
- lib_unicodelib.UnicodeLib.UnicodeToUpperCase(src);
- /**
- * Returns the upper case variant of the string using the rules for a particular locale.
- *
- * @param src The string that is being converted.
- * @param locale_name The locale to use for the comparison
- */
- EXPORT unicode ToTitleCase(unicode src) :=
- lib_unicodelib.UnicodeLib.UnicodeToProperCase(src);
- /**
- * Returns the lower case variant of the string using the rules for a particular locale.
- *
- * @param src The string that is being converted.
- * @param locale_name The locale to use for the comparison
- */
- EXPORT unicode LocaleToLowerCase(unicode src, varstring locale_name) :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleToLowerCase(src, locale_name);
- /**
- * Returns the upper case variant of the string using the rules for a particular locale.
- *
- * @param src The string that is being converted.
- * @param locale_name The locale to use for the comparison
- */
- EXPORT unicode LocaleToUpperCase(unicode src, varstring locale_name) :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleToUpperCase(src, locale_name);
- /**
- * Returns the upper case variant of the string using the rules for a particular locale.
- *
- * @param src The string that is being converted.
- * @param locale_name The locale to use for the comparison
- */
- EXPORT unicode LocaleToTitleCase(unicode src, varstring locale_name) :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleToProperCase(src, locale_name);
- /**
- * Compares the two strings case insensitively. Equivalent to comparing at strength 2.
- *
- * @param src1 The first string to be compared.
- * @param src2 The second string to be compared.
- * @see Std.Uni.CompareAtStrength
- */
-
- EXPORT integer4 CompareIgnoreCase(unicode src1, unicode src2) :=
- lib_unicodelib.UnicodeLib.UnicodeCompareIgnoreCase(src1, src2);
- /**
- * Compares the two strings case insensitively. Equivalent to comparing at strength 2.
- *
- * @param src1 The first string to be compared.
- * @param src2 The second string to be compared.
- * @param strength The strength of the comparison
- 1 ignores accents and case, differentiating only between letters
- 2 ignores case but differentiates between accents.
- 3 differentiates between accents and case but ignores e.g. differences between Hiragana and Katakana
- 4 differentiates between accents and case and e.g. Hiragana/Katakana, but ignores e.g. Hebrew cantellation marks
- 5 differentiates between all strings whose canonically decomposed forms (NFD�Normalization Form D) are non-identical
- * @see Std.Uni.CompareAtStrength
- */
-
- EXPORT integer4 CompareAtStrength(unicode src1, unicode src2, integer1 strength) :=
- lib_unicodelib.UnicodeLib.UnicodeCompareAtStrength(src1, src2, strength);
- /**
- * Compares the two strings case insensitively. Equivalent to comparing at strength 2.
- *
- * @param src1 The first string to be compared.
- * @param src2 The second string to be compared.
- * @param locale_name The locale to use for the comparison
- * @see Std.Uni.CompareAtStrength
- */
-
- EXPORT integer4 LocaleCompareIgnoreCase(unicode src1, unicode src2, varstring locale_name) :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleCompareIgnoreCase(src1, src2, locale_name);
- /**
- * Compares the two strings case insensitively. Equivalent to comparing at strength 2.
- *
- * @param src1 The first string to be compared.
- * @param src2 The second string to be compared.
- * @param locale_name The locale to use for the comparison
- * @param strength The strength of the comparison
- 1 ignores accents and case, differentiating only between letters
- 2 ignores case but differentiates between accents.
- 3 differentiates between accents and case but ignores e.g. differences between Hiragana and Katakana
- 4 differentiates between accents and case and e.g. Hiragana/Katakana, but ignores e.g. Hebrew cantellation marks
- 5 differentiates between all strings whose canonically decomposed forms (NFD�Normalization Form D) are non-identical
- */
- EXPORT integer4 LocaleCompareAtStrength(unicode src1, unicode src2, varstring locale_name, integer1 strength) :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleCompareAtStrength(src1, src2, locale_name, strength);
- /**
- * Returns the argument string with all characters in reverse order.
- * Note the argument is not TRIMMED before it is reversed.
- *
- * @param src The string that is being reversed.
- */
- EXPORT unicode Reverse(unicode src) :=
- lib_unicodelib.UnicodeLib.UnicodeReverse(src);
- /**
- * Returns the source string with the replacement string substituted for all instances of the search string.
- *
- * @param src The string that is being transformed.
- * @param sought The string to be replaced.
- * @param replacement The string to be substituted into the result.
- */
- EXPORT unicode FindReplace(unicode src, unicode sought, unicode replacement) :=
- lib_unicodelib.UnicodeLib.UnicodeFindReplace(src, sought, replacement);
- /**
- * Returns the source string with the replacement string substituted for all instances of the search string.
- *
- * @param src The string that is being transformed.
- * @param sought The string to be replaced.
- * @param replacement The string to be substituted into the result.
- * @param locale_name The locale to use for the comparison
- */
- EXPORT unicode LocaleFindReplace(unicode src, unicode sought, unicode replacement, varstring locale_name) :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleFindReplace(src, sought, replacement, locale_name);
- /**
- * Returns the source string with the replacement string substituted for all instances of the search string.
- *
- * @param src The string that is being transformed.
- * @param sought The string to be replaced.
- * @param replacement The string to be substituted into the result.
- * @param locale_name The locale to use for the comparison
- * @param strength The strength of the comparison
- */
- EXPORT unicode LocaleFindAtStrengthReplace(unicode src, unicode sought, unicode replacement, varstring locale_name, integer1 strength) :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleFindAtStrengthReplace(src, sought, replacement, locale_name, strength);
- /**
- * Returns the source string with all accented characters replaced with unaccented.
- *
- * @param src The string that is being transformed.
- */
- EXPORT unicode CleanAccents(unicode src) :=
- lib_unicodelib.UnicodeLib.UnicodeCleanAccents(src);
- /**
- * Returns the source string with all instances of multiple adjacent space characters (2 or more spaces together)
- * reduced to a single space character. Leading and trailing spaces are removed, and tab characters are converted
- * to spaces.
- *
- * @param src The string to be cleaned.
- */
- EXPORT unicode CleanSpaces(unicode src) :=
- lib_unicodelib.UnicodeLib.UnicodeCleanSpaces(src);
- /**
- * Tests if the search string matches the pattern.
- * The pattern can contain wildcards '?' (single character) and '*' (multiple character).
- *
- * @param src The string that is being tested.
- * @param pattern The pattern to match against.
- * @param ignore_case Whether to ignore differences in case between characters
- */
-
- EXPORT boolean WildMatch(unicode src, unicode _pattern, boolean _noCase) :=
- lib_unicodelib.UnicodeLib.UnicodeWildMatch(src, _pattern, _noCase);
- /**
- * Tests if the search string contains each of the characters in the pattern.
- * If the pattern contains duplicate characters those characters will match once for each occurence in the pattern.
- *
- * @param src The string that is being tested.
- * @param pattern The pattern to match against.
- * @param ignore_case Whether to ignore differences in case between characters
- */
-
- EXPORT BOOLEAN Contains(unicode src, unicode _pattern, boolean _noCase) :=
- lib_unicodelib.UnicodeLib.UnicodeContains(src, _pattern, _noCase);
- /**
- * Returns the minimum edit distance between the two strings. An insert change or delete counts as a single edit.
- * The two strings are trimmed before comparing.
- *
- * @param _left The first string to be compared.
- * @param _right The second string to be compared.
- * @param localname The locale to use for the comparison. Defaults to ''.
- * @param radius The maximum edit distance that is acceptable, or 0 for no limit. Defaults to 0.
- * @return The minimum edit distance between the two strings. Edit distances above radius will
- return an arbitrary value larger than radius.
- */
- EXPORT UNSIGNED4 EditDistance(unicode _left, unicode _right, varstring localename = '', UNSIGNED4 radius = 0) :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleEditDistanceV2(_left, _right, localename, radius);
- /**
- * Returns true if the minimum edit distance between the two strings is with a specific range.
- * The two strings are trimmed before comparing.
- *
- * @param _left The first string to be compared.
- * @param _right The second string to be compared.
- * @param radius The maximum edit distance that is acceptable.
- * @param localname The locale to use for the comparison. Defaults to ''.
- * @return Whether or not the two strings are within the given specified edit distance.
- */
- EXPORT BOOLEAN EditDistanceWithinRadius(unicode _left, unicode _right, unsigned4 radius, varstring localename = '') :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleEditDistanceWithinRadius(_left, _right, radius, localename);
- /**
- * Returns the number of words in the string. Word boundaries are marked by the unicode break semantics.
- *
- * @param text The string to be broken into words.
- * @param localname The locale to use for the break semantics. Defaults to ''.
- * @return The number of words in the string.
- */
- EXPORT unsigned4 WordCount(unicode text, varstring localename = '') :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleWordCount(text, localename);
- /**
- * Returns the n-th word from the string. Word boundaries are marked by the unicode break semantics.
- *
- * @param text The string to be broken into words.
- * @param n Which word should be returned from the function.
- * @param localname The locale to use for the break semantics. Defaults to ''.
- * @return The number of words in the string.
- */
- EXPORT unicode GetNthWord(unicode text, unsigned4 n, varstring localename = '') :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleGetNthWord(text, n, localename);
- /**
- * Returns everything but the string's nth word and some whitespaces. Words are marked by the unicode break semantics.
- * Trailing whitespaes are always removed with the word.
- * Leading whitespaces are only removed with the word if the nth word is the first word.
- * Returns a blank string if there are no words in the source string.
- * Returns the source string if the number of words in the string is less than the n parameter's assigned value.
- *
- * @param text The string to be broken into words.
- * @param n Which word should be removed from the string.
- * @param localname The locale to use for the break semantics. Defaults to ''.
- * @return The string excluding the nth word.
- */
- EXPORT ExcludeNthWord(unicode text, unsigned4 n, varstring localename = '') :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleExcludeNthWord(text, n, localename);
- /**
- * Returns everything except the first word from the string. Words are marked by the unicode break semantics.
- * Whitespace before and after the first word is also removed.
- *
- * @param text The string to be broken into words.
- * @return The string excluding the first word.
- */
- EXPORT ExcludeFirstWord(unicode text, varstring localename = '') :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleExcludeNthWord(text, 1, localename);
- /**
- * Returns everything except the last word from the string. Word boundaries are marked by the unicode break semantics.
- * Whitespace after a word is removed with the word and leading whitespace is removed with the first word.
- *
- * @param text The string to be broken into words.
- * @param localname The locale to use for the break semantics. Defaults to ''.
- * @return The string excluding the last word.
- */
- EXPORT unicode ExcludeLastWord(unicode text, varstring localename = '') :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleExcludeLastWord(text, localename);
- /**
- * Returns the source string with the all characters that match characters in the search string replaced
- * with the character at the corresponding position in the replacement string.
- * The isEmpty() tests in the beginning of the function check for invalid sequences in addition to blank strings.
- * If any of the isEmpty() tests are true, the function will return the source string.
- *
- * @param src The string that is being tested.
- * @param search The string containing the set of characters to be included.
- * @param replacement The string containing the characters to act as replacements.
- * @return The string containing the source string but with the translated characters.
- */
- EXPORT Translate(unicode text, unicode search, unicode replacement) :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleTranslate(text, search, replacement);
- /**
- * Returns true if the prefix string matches the leading characters in the source string. Trailing and Leading spaces
- * are stripped from the prefix before matching. Unless specified, normalization will not occur. Unless initiated as hex and
- * then converted to Unicode using TRANSFER, ecl will perform its own normalization on your declared Unicode string.
- *
- * @param src The string being searched in.
- * @param prefix The prefix to search for.
- * @param form The type of Normalization to be employed.
- */
- EXPORT BOOLEAN StartsWith(unicode src, unicode prefix, string form) :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleStartsWith(src, prefix, form);
- /**
- * Returns true if the suffix string matches the trailing characters in the source string. Trailing and Leading spaces
- * are stripped from the suffix before matching. Unless specified, normalization will not occur. Unless initiated as hex and
- * then converted to Unicode using TRANSFER, ecl will perform its own normalization on your declared Unicode string.
- *
- * @param src The string being searched in.
- * @param suffix The suffix to search for.
- * @param form The type of Normalization to be employed.
- */
- EXPORT BOOLEAN EndsWith(unicode src, unicode suffix, string form) :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleEndsWith(src, suffix, form);
- /**
- * Returns a string containing the version of icu being used to implement the unicode library.
- */
- EXPORT STRING Version() := lib_unicodelib.UnicodeLib.UnicodeVersion();
- /**
- * Removes the suffix from the search string, if present, and returns the result. Trailing spaces are
- * stripped from both strings before matching.
- *
- * @param src The string being searched in.
- * @param suffix The suffix to search for.
- * @param form The type of Normalization to be employed.
- * @return The string excluding the suffix, if endsWith is true
- */
- EXPORT RemoveSuffix(unicode src, unicode suffix, string form) :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleRemoveSuffix(src, suffix, form);
- /*
- * Returns a string containing text repeated n times.
- *
- * @param text The string to be repeated.
- * @param n Number of repetitions.
- * @return A string containing n concatenations of the string text.
- */
- EXPORT Repeat(unicode text, unsigned4 n) :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleRepeat(text, n);
- /**
- * Returns the number of occurences of the second string within the first string.
- *
- * @param src The string that is searched.
- * @param sought The string being sought.
- * @param form The optional, specified normalization form.
- * @return The number of occurences, matches.
- */
- EXPORT unsigned4 FindCount(unicode src, unicode sought, string form) :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleFindCount(src, sought, form);
- /**
- * Returns the number of words that the string contains. Words are separated by one or more separator strings. No
- * spaces are stripped from either string before matching. allow_blank set to false by default.
- *
- * @param src The string being searched in.
- * @param separator The string used to separate words
- * @param allow_blank Indicates if empty/blank string items are included in the results.
- * @return The number of delimited tokens in the source string
- */
- EXPORT unsigned4 CountWords(unicode src, unicode separator, boolean allow_blank = FALSE) :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleCountWords(src, separator, allow_blank);
- /**
- * Returns the delimited words that the string contains in a UnicodeSet. Words are separated by one or more separator strings. No
- * spaces are stripped from either string before matching. allow_blank is set to false by default.
- *
- * @param src The string being searched in.
- * @param separator The string used to separate words
- * @param allow_blank Indicates if empty/blank string items are included in the results.
- * @return A UnicodeSet whose members are the delimited words
- */
- EXPORT SplitWords(unicode src, unicode separator, boolean allow_blank = FALSE) :=
- lib_unicodelib.UnicodeLib.UnicodeLocaleSplitWords(src, separator, allow_blank);
- END;
|