You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
107 lines
2.8 KiB
CoffeeScript
107 lines
2.8 KiB
CoffeeScript
UnicodeTrie = require 'unicode-trie'
|
|
data = require './data.json'
|
|
fs = require 'fs'
|
|
trie = new UnicodeTrie fs.readFileSync __dirname + '/data.trie'
|
|
|
|
log2 = Math.log2 or (n) ->
|
|
Math.log(n) / Math.LN2
|
|
|
|
bits = (n) ->
|
|
(log2(n) + 1) | 0
|
|
|
|
# compute the number of bits stored for each field
|
|
CATEGORY_BITS = bits(data.categories.length - 1)
|
|
COMBINING_BITS = bits(data.combiningClasses.length - 1)
|
|
SCRIPT_BITS = bits(data.scripts.length - 1)
|
|
EAW_BITS = bits(data.eaw.length - 1)
|
|
NUMBER_BITS = 10
|
|
|
|
# compute shift and mask values for each field
|
|
CATEGORY_SHIFT = COMBINING_BITS + SCRIPT_BITS + EAW_BITS + NUMBER_BITS
|
|
COMBINING_SHIFT = SCRIPT_BITS + EAW_BITS + NUMBER_BITS
|
|
SCRIPT_SHIFT = EAW_BITS + NUMBER_BITS
|
|
EAW_SHIFT = NUMBER_BITS
|
|
|
|
CATEGORY_MASK = (1 << CATEGORY_BITS) - 1
|
|
COMBINING_MASK = (1 << COMBINING_BITS) - 1
|
|
SCRIPT_MASK = (1 << SCRIPT_BITS) - 1
|
|
EAW_MASK = (1 << EAW_BITS) - 1
|
|
NUMBER_MASK = (1 << NUMBER_BITS) - 1
|
|
|
|
exports.getCategory = (codePoint) ->
|
|
val = trie.get codePoint
|
|
data.categories[(val >> CATEGORY_SHIFT) & CATEGORY_MASK]
|
|
|
|
exports.getCombiningClass = (codePoint) ->
|
|
val = trie.get codePoint
|
|
data.combiningClasses[(val >> COMBINING_SHIFT) & COMBINING_MASK]
|
|
|
|
exports.getScript = (codePoint) ->
|
|
val = trie.get codePoint
|
|
data.scripts[(val >> SCRIPT_SHIFT) & SCRIPT_MASK]
|
|
|
|
exports.getEastAsianWidth = (codePoint) ->
|
|
val = trie.get codePoint
|
|
data.eaw[(val >> EAW_SHIFT) & EAW_MASK]
|
|
|
|
exports.getNumericValue = (codePoint) ->
|
|
val = trie.get codePoint
|
|
num = val & NUMBER_MASK
|
|
|
|
if num is 0
|
|
return null
|
|
else if num <= 50
|
|
return num - 1
|
|
else if num < 0x1e0
|
|
# fraction
|
|
numerator = (num >> 4) - 12
|
|
denominator = (num & 0xf) + 1
|
|
return numerator / denominator
|
|
else if num < 0x300
|
|
# base 10
|
|
val = (num >> 5) - 14
|
|
exp = (num & 0x1f) + 2
|
|
|
|
while exp > 0
|
|
val *= 10
|
|
exp--
|
|
|
|
return val
|
|
else
|
|
# base 60
|
|
val = (num >> 2) - 0xbf
|
|
exp = (num & 3) + 1
|
|
|
|
while exp > 0
|
|
val *= 60
|
|
exp--
|
|
|
|
return val
|
|
|
|
exports.isAlphabetic = (codePoint) ->
|
|
exports.getCategory(codePoint) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl']
|
|
|
|
exports.isDigit = (codePoint) ->
|
|
exports.getCategory(codePoint) is 'Nd'
|
|
|
|
exports.isPunctuation = (codePoint) ->
|
|
exports.getCategory(codePoint) in ['Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', 'Ps']
|
|
|
|
exports.isLowerCase = (codePoint) ->
|
|
exports.getCategory(codePoint) is 'Ll'
|
|
|
|
exports.isUpperCase = (codePoint) ->
|
|
exports.getCategory(codePoint) is 'Lu'
|
|
|
|
exports.isTitleCase = (codePoint) ->
|
|
exports.getCategory(codePoint) is 'Lt'
|
|
|
|
exports.isWhiteSpace = (codePoint) ->
|
|
exports.getCategory(codePoint) in ['Zs', 'Zl', 'Zp']
|
|
|
|
exports.isBaseForm = (codePoint) ->
|
|
exports.getCategory(codePoint) in ['Nd', 'No', 'Nl', 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Me', 'Mc']
|
|
|
|
exports.isMark = (codePoint) ->
|
|
exports.getCategory(codePoint) in ['Mn', 'Me', 'Mc']
|