Fix emojify() by generating a mapping to existing Twemoji files (#5080)
A new rake task emojis:generate downloads a full list of valid unicode sequences from unicode.org and checks it against existing Twemoji files, finally generating a map from each sequence to the existing file (e.g. when there's multiple ways an emoji can be expressed). The map is dumped into app/javascript/mastodon/emoji_map.json That file is loaded by emojione_light.js (now a misnomer) which decorates it further with shortcodes taken from emoji-mart's index.
This commit is contained in:
		
							
								
								
									
										1
									
								
								app/javascript/mastodon/emoji_map.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								app/javascript/mastodon/emoji_map.json
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							@@ -1,13 +1,38 @@
 | 
			
		||||
// @preval
 | 
			
		||||
// Force tree shaking on emojione by exposing just a subset of its functionality
 | 
			
		||||
// http://www.unicode.org/Public/emoji/5.0/emoji-test.txt
 | 
			
		||||
 | 
			
		||||
const emojione = require('emojione');
 | 
			
		||||
const emojis         = require('./emoji_map.json');
 | 
			
		||||
const { emojiIndex } = require('emoji-mart');
 | 
			
		||||
const excluded       = ['®', '©', '™'];
 | 
			
		||||
const skins          = ['🏻', '🏼', '🏽', '🏾', '🏿'];
 | 
			
		||||
const shortcodeMap   = {};
 | 
			
		||||
 | 
			
		||||
const mappedUnicode = emojione.mapUnicodeToShort();
 | 
			
		||||
const excluded = ['®', '©', '™'];
 | 
			
		||||
Object.keys(emojiIndex.emojis).forEach(key => {
 | 
			
		||||
  shortcodeMap[emojiIndex.emojis[key].native] = emojiIndex.emojis[key].id;
 | 
			
		||||
});
 | 
			
		||||
 | 
			
		||||
module.exports.unicodeMapping = Object.keys(emojione.jsEscapeMap)
 | 
			
		||||
  .filter(c => !excluded.includes(c))
 | 
			
		||||
  .map(unicodeStr => [unicodeStr, mappedUnicode[emojione.jsEscapeMap[unicodeStr]]])
 | 
			
		||||
  .map(([unicodeStr, shortCode]) => ({ [unicodeStr]: [emojione.emojioneList[shortCode].fname.replace(/^0+/g, ''), shortCode.slice(1, shortCode.length - 1)] }))
 | 
			
		||||
  .reduce((x, y) => Object.assign(x, y), { });
 | 
			
		||||
const stripModifiers = unicode => {
 | 
			
		||||
  skins.forEach(tone => {
 | 
			
		||||
    unicode = unicode.replace(tone, '');
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
  return unicode;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
Object.keys(emojis).forEach(key => {
 | 
			
		||||
  if (excluded.includes(key)) {
 | 
			
		||||
    delete emojis[key];
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const normalizedKey = stripModifiers(key);
 | 
			
		||||
  let shortcode       = shortcodeMap[normalizedKey];
 | 
			
		||||
 | 
			
		||||
  if (!shortcode) {
 | 
			
		||||
    shortcode = shortcodeMap[normalizedKey + '\uFE0F'];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  emojis[key] = [emojis[key], shortcode];
 | 
			
		||||
});
 | 
			
		||||
 | 
			
		||||
module.exports.unicodeMapping = emojis;
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										56
									
								
								lib/tasks/emojis.rake
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								lib/tasks/emojis.rake
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,56 @@
 | 
			
		||||
# frozen_string_literal: true
 | 
			
		||||
 | 
			
		||||
def codepoints_to_filename(codepoints)
 | 
			
		||||
  codepoints.downcase.gsub(/\A[0]+/, '').tr(' ', '-')
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
def codepoints_to_unicode(codepoints)
 | 
			
		||||
  if codepoints.include?(' ')
 | 
			
		||||
    codepoints.split(' ').map(&:hex).pack('U*')
 | 
			
		||||
  else
 | 
			
		||||
    [codepoints.hex].pack('U')
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
namespace :emojis do
 | 
			
		||||
  desc 'Generate a unicode to filename mapping'
 | 
			
		||||
  task :generate do
 | 
			
		||||
    source = 'http://www.unicode.org/Public/emoji/5.0/emoji-test.txt'
 | 
			
		||||
    codes  = []
 | 
			
		||||
    dest   = Rails.root.join('app', 'javascript', 'mastodon', 'emoji_map.json')
 | 
			
		||||
 | 
			
		||||
    puts "Downloading emojos from source... (#{source})"
 | 
			
		||||
 | 
			
		||||
    HTTP.get(source).to_s.split("\n").each do |line|
 | 
			
		||||
      next if line.start_with? '#'
 | 
			
		||||
      parts = line.split(';').map(&:strip)
 | 
			
		||||
      next if parts.size < 2
 | 
			
		||||
      codes << [parts[0], parts[1].start_with?('fully-qualified')]
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    grouped_codes = codes.reduce([]) do |agg, current|
 | 
			
		||||
      if current[1]
 | 
			
		||||
        agg << [current[0]]
 | 
			
		||||
      else
 | 
			
		||||
        agg.last << current[0]
 | 
			
		||||
        agg
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    existence_maps = grouped_codes.map { |c| c.map { |cc| [cc, File.exist?(Rails.root.join('public', 'emoji', codepoints_to_filename(cc) + '.svg'))] }.to_h }
 | 
			
		||||
    map = {}
 | 
			
		||||
 | 
			
		||||
    existence_maps.each do |group|
 | 
			
		||||
      existing_one = group.key(true)
 | 
			
		||||
 | 
			
		||||
      group.each_key do |key|
 | 
			
		||||
        map[codepoints_to_unicode(key)] = codepoints_to_filename(existing_one)
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    map = map.sort { |a, b| a[0].size <=> b[0].size }.to_h
 | 
			
		||||
 | 
			
		||||
    File.write(dest, Oj.dump(map))
 | 
			
		||||
    puts "Wrote emojo to destination! (#{dest})"
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
@@ -22,10 +22,10 @@ describe('emojify', () => {
 | 
			
		||||
 | 
			
		||||
  it('does unicode', () => {
 | 
			
		||||
    expect(emojify('\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC66\u200D\uD83D\uDC66')).to.equal(
 | 
			
		||||
      '<img draggable="false" class="emojione" alt="👩👩👦👦" title=":family_wwbb:" src="/emoji/1f469-1f469-1f466-1f466.svg" />');
 | 
			
		||||
    expect(emojify('\uD83D\uDC68\uD83D\uDC69\uD83D\uDC67\uD83D\uDC67')).to.equal(
 | 
			
		||||
      '<img draggable="false" class="emojione" alt="👨👩👧👧" title=":family_mwgg:" src="/emoji/1f468-1f469-1f467-1f467.svg" />');
 | 
			
		||||
    expect(emojify('\uD83D\uDC69\uD83D\uDC69\uD83D\uDC66')).to.equal('<img draggable="false" class="emojione" alt="👩👩👦" title=":family_wwb:" src="/emoji/1f469-1f469-1f466.svg" />');
 | 
			
		||||
      '<img draggable="false" class="emojione" alt="👩👩👦👦" title=":woman-woman-boy-boy:" src="/emoji/1f469-200d-1f469-200d-1f466-200d-1f466.svg" />');
 | 
			
		||||
    expect(emojify('👨👩👧👧')).to.equal(
 | 
			
		||||
      '<img draggable="false" class="emojione" alt="👨👩👧👧" title=":man-woman-girl-girl:" src="/emoji/1f468-200d-1f469-200d-1f467-200d-1f467.svg" />');
 | 
			
		||||
    expect(emojify('👩👩👦')).to.equal('<img draggable="false" class="emojione" alt="👩👩👦" title=":woman-woman-boy:" src="/emoji/1f469-200d-1f469-200d-1f466.svg" />');
 | 
			
		||||
    expect(emojify('\u2757')).to.equal(
 | 
			
		||||
      '<img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" />');
 | 
			
		||||
  });
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user