Japanese and more generally CJK typesetting

From LuaTeXWiki
Revision as of 23:15, 7 December 2010 by Patrick (talk | contribs) (Copied from the old bluwiki.com luatex wiki)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

First define the following tables and functions in lua:

table_le = {[0x3008]=true, [0x300A]=true, [0x300C]=true, [0x300E]=true, 
[0x3010]=true, [0x3014]=true, [0x3016]=true, [0x3018]=true, [0x301A]=true, 
[0x301D]=true, [0xFF08]=true, [0xFF3B]=true, [0xFF5B]=true}

table_re = {[0x3001]=true, [0x3002]=true, [0x3009]=true, [0x300B]=true, 
[0x300D]=true, [0x300F]=true, [0x3011]=true, [0x3015]=true, [0x3017]=true, 
[0x3019]=true, [0x301F]=true, [0xFF09]=true, [0xFF0C]=true, [0xFF0E]=true, 
[0xFF3D]=true, [0xFF5D]=true}

table_sk = {[0x3041]=true, [0x3043]=true, [0x3045]=true, [0x3047]=true, 
[0x3049]=true, [0x3063]=true, [0x3083]=true, [0x3085]=true, [0x3087]=true, 
[0x3083]=true, [0x30A1]=true, [0x30A3]=true, [0x30A5]=true, [0x30A7]=true, 
[0x30A9]=true, [0x30C3]=true, [0x30E3]=true, [0x30E5]=true, [0x30E7]=true, 
[0x30EE]=true, [0x30F5]=true, [0x30F6]=true}

table_aux = {[0x3005]=true, [0x3006]=true, [0x301C]=true, [0x309D]=true, 
[0x309E]=true, [0x30FD]=true, [0x30FE]=true}

table_ll = {[0x28]=true, [0x5B]=true}

table_rl = { [0x29]=true,  [0x5D]=true,  [0x21]=true,  [0x2C]=true,  [0x2E]=true,  
[0x3A]=true,  [0x3B]=true,  [0x3F]=true}

function is_CJK(id) if ((id >= 12288 and id <= 55207) or (id >= 65280 and id <= 65504)) then return true; else return false; end end 

function is_CJK_le(id) return table_le[id] end

function is_CJK_re(id) return table_re[id] end

function is_CJK_karenb(id) return (is_CJK(id) and (not table_re[id])) end

function is_CJK_ll(id) return table_ll[id] end

function is_CJK_rl(id) return table_ll[id] end

function is_CJK_ka(id) return (is_CJK(id) and (not (table_re[id] or table_le[id] or table_sk[id] or table_aux[id]))) end

function insert_penalty(head,t) n=node.new("penalty"); n.penalty=10000; node.insert_after(head,t,n); end

function insert_small_glue(head,t) 
   n=node.new("glue"); 
   gs=node.new("glue_spec"); 
   gs.width=0;
   gs.stretch=0.05 * font.fonts[t.font].size;
   gs.stretch_order=0;
   gs.shrink=0;
   gs.shrink_order=0;
   n.spec=gs;
   node.insert_after(head,t,n);
end

function insert_half_glue(head,t) 
   n=node.new("glue");
   gs=node.new("glue_spec");
   gs.width=-0.5 * font.fonts[t.font].size;
   gs.stretch=0.5 * font.fonts[t.font].size;
   gs.stretch_order=0;
   gs.shrink=0.05 * font.fonts[t.font].size;
   gs.shrink_order=0;
   n.spec=gs;
   node.insert_after(head,t,n);
end

function insert_double_half(head,t) 
   n=node.new("glue");
   gs=node.new("glue_spec");
   gs.width=-font.fonts[t.font].size;
   gs.stretch=font.fonts[t.font].size;
   gs.stretch_order=0;
   gs.shrink=0.05 * font.fonts[t.font].size;
   gs.shrink_order=0;
   n.spec=gs;
   node.insert_after(head,t,n);
end

Then use the following callback:

callback.register('pre_linebreak_filter',
function(t,groupcode,glyphes)
   local node_id_glyph=node.id("glyph")
   local node_id_glue=node.id("glue")
   head = t
   for t in node.traverse(head) do
      if (node.has_attribute(t,kinsoku)) then
         if (t.next and t.next.next and t.id == node_id_glyph and t.next.id == node_id_glue and t.next.next.id == node_id_glyph and is_CJK(t.char) and is_CJK(t.next.next.char)) 
            then l = t.next.next
                 node.free(t.next)
                 t.next = l
         end
         if (t.next and t.id == node_id_glyph and t.next.id == node_id_glyph) then
            if (is_CJK(t.next.char)) then
               if (is_CJK_le(t.char)) then insert_small_glue(head,t); insert_penalty(head,t); 
               elseif (is_CJK_re(t.char)) then 
                  if (is_CJK_karenb(t.next.char)) then insert_half_glue(head,t);
                  else insert_double_half(head,t); end
               elseif (is_CJK_ll(t.char)) then
                  if (is_CJK_karenb(t.next.char)) then insert_small_glue(head,t); insert_penalty(head,t); 
                  else insert_half_glue(head,t); insert_penalty(head,t); end
               else
                  if (is_CJK_ka(t.next.char)) then insert_small_glue(head,t);
                  elseif (is_CJK_le(t.next.char)) then insert_half_glue(head,t);
                  else insert_small_glue(head,t); insert_penalty(head,t); end
               end
            elseif (is_CJK(t.char)) then
               if (is_CJK_re(t.char)) then 
                  if (is_CJK_rl(t.next.char)) then insert_half_glue(head,t); insert_penalty(head,t); 
                  else insert_half_glue(head,t); end
               elseif (is_CJK_le(t.char) or is_CJK_rl(t.next.char)) then insert_small_glue(head,t); insert_penalty(head,t);
               else insert_small_glue(head,t); end
            end
         end
      end
   end
return true
end )

And the following code in your TeX file:

\def\newattribute#1#2{%
\expandafter\attributedef \csname#1\endcsname #2
\directlua0{#1 = #2 }}
\newattribute{kinsoku}{300}

\def\startkinsoku{\kinsoku1}
\def\stopkinsoku{\kinsoku-1} % -1 signals 'unset'

As you can see, there is \startkinsoku to start kinsoku and \stopkinsoku to stop it. You also need a font: just take the code on page use a TrueType font and use any TrueType CJK font.