<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://wiki.luatex.org/api.php?action=feedcontributions&amp;feedformat=atom&amp;user=Esteis</id>
	<title>LuaTeXWiki - User contributions [en]</title>
	<link rel="self" type="application/atom+xml" href="https://wiki.luatex.org/api.php?action=feedcontributions&amp;feedformat=atom&amp;user=Esteis"/>
	<link rel="alternate" type="text/html" href="https://wiki.luatex.org/index.php/Special:Contributions/Esteis"/>
	<updated>2026-04-22T12:51:26Z</updated>
	<subtitle>User contributions</subtitle>
	<generator>MediaWiki 1.31.1</generator>
	<entry>
		<id>https://wiki.luatex.org/index.php?title=Annotate_math_expressions&amp;diff=164</id>
		<title>Annotate math expressions</title>
		<link rel="alternate" type="text/html" href="https://wiki.luatex.org/index.php?title=Annotate_math_expressions&amp;diff=164"/>
		<updated>2012-06-12T09:48:00Z</updated>

		<summary type="html">&lt;p&gt;Esteis: One very long line of code was breaking horizontal scroll.&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;Now with the scripting language Lua having access to the TeX internals, it is quite easy to generate PDF annotations automatically.&lt;br /&gt;
An interesting example I started to explore is whether it would be possible to generate Content MathML expressions from the low level&lt;br /&gt;
TeX &amp;#039;&amp;#039;mathlist&amp;#039;&amp;#039; node representations exposed via the proper LuaTeX callback &amp;#039;&amp;#039;mlist_to_hlist&amp;#039;&amp;#039;.&lt;br /&gt;
&lt;br /&gt;
Succinctly, it is possible to generate Content MathML from simple math formulas, however my initial approach using context free grammar parsers, (i.e. lpeg) are severely limited by the fact that the interpretation of LaTeX math expressions is rather context sensitive.&lt;br /&gt;
&lt;br /&gt;
A much simpler topic is how Lua(La)TeX could be used to automatically generate math expression bounding boxes in PDF documents, such&lt;br /&gt;
that extraction programs can reliably identify text areas in the PDF document that pertain to math formulas.&lt;br /&gt;
&lt;br /&gt;
The entire code snippets can be downloaded from: [https://gist.github.com/2018232 https://gist.github.com/2018232]&lt;br /&gt;
&lt;br /&gt;
We use two callback functions, &amp;#039;&amp;#039;mlist_to_hlist&amp;#039;&amp;#039; to insert a PDF annotation node and the callback &amp;#039;&amp;#039;pre_output_filter&amp;#039;&amp;#039; to identify the bounding box for a math formula.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
-- doesn&amp;#039;t yield anything interesting&lt;br /&gt;
function convertToMathML(head)&lt;br /&gt;
	return {tag=&amp;quot;not implemented&amp;quot;}&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
-- create content MathML for every math formula&lt;br /&gt;
luatexbase.add_to_callback(&amp;#039;mlist_to_hlist&amp;#039;,&lt;br /&gt;
function(head, display, penalty)&lt;br /&gt;
	texio.write_nl(&amp;#039;NEW mathlist&amp;#039;)&lt;br /&gt;
&lt;br /&gt;
	result = convertToMathML(head)&lt;br /&gt;
	if result ~= nil then&lt;br /&gt;
		et = etree.ElementTree({tag = &amp;quot;math&amp;quot;, result}, {decl = false})&lt;br /&gt;
		local pdf = node.new(&amp;quot;whatsit&amp;quot;, &amp;quot;pdf_annot&amp;quot;)&lt;br /&gt;
		local buffer = etree.StringBuffer()&lt;br /&gt;
		et:write(buffer)&lt;br /&gt;
		pdf.data = &amp;#039;/Subtype /MathML /Contents (&amp;#039; .. tostring(buffer) .. &amp;#039;)&amp;#039;&lt;br /&gt;
		head = node.insert_before(head, head, pdf)&lt;br /&gt;
	end&lt;br /&gt;
	return node.mlist_to_hlist(head, display, penalty)&lt;br /&gt;
end,&lt;br /&gt;
	&amp;quot;content MathML generator&amp;quot;)&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
In the callback above the function &amp;#039;&amp;#039;convertToMathML&amp;#039;&amp;#039; does not yield any interesting result:&lt;br /&gt;
&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
-- element tree, http://etree.luaforge.net/ (is a bit buggy for the {decl = false} option)&lt;br /&gt;
local el = require &amp;quot;etree&amp;quot;&lt;br /&gt;
&lt;br /&gt;
function convertToMathML(head)&lt;br /&gt;
	return {tag=&amp;quot;not implemented&amp;quot;}&lt;br /&gt;
end&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
Now having tagged the math formula, we still need a bounding box. Luckily, the the pre-output phase is&lt;br /&gt;
intercepted by the &amp;#039;&amp;#039;pre-output-filter&amp;#039;&amp;#039; and can be used to accomplish exactly that!&lt;br /&gt;
&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
local vpack_counter = 1&lt;br /&gt;
&lt;br /&gt;
luatexbase.add_to_callback(&amp;#039;pre_output_filter&amp;#039;,&lt;br /&gt;
function(head)&lt;br /&gt;
	add_size_to_annot(head,{width=0,height=0,depth=0})&lt;br /&gt;
	-- viz.nodelist_visualize(head, &amp;quot;vpack&amp;quot;..vpack_counter..&amp;quot;.gv&amp;quot;)&lt;br /&gt;
	vpack_counter = vpack_counter + 1&lt;br /&gt;
	return head&lt;br /&gt;
end&lt;br /&gt;
	,&amp;quot;find math bounding box&amp;quot;)&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
Of course, we need the &amp;#039;&amp;#039;add_size_to_annot&amp;#039;&amp;#039; function:&lt;br /&gt;
&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
local whatsit   = node.id(&amp;#039;whatsit&amp;#039;)&lt;br /&gt;
local hlist     = node.id(&amp;#039;hlist&amp;#039;)&lt;br /&gt;
local vlist     = node.id(&amp;#039;vlist&amp;#039;)&lt;br /&gt;
local math_node = node.id(&amp;#039;math&amp;#039;)&lt;br /&gt;
&lt;br /&gt;
local function add_size_to_annot(head, hbox)&lt;br /&gt;
	while head do&lt;br /&gt;
		typ = head.id&lt;br /&gt;
		if typ == vlist then&lt;br /&gt;
			add_size_to_annot(head.head, hbox)&lt;br /&gt;
		elseif typ == hlist then&lt;br /&gt;
			add_size_to_annot(head.head, {width=head.width,height=head.height,depth=head.depth})&lt;br /&gt;
		elseif typ == whatsit and head.subtype == 15 and&lt;br /&gt;
			string.sub(head.data, 1, 16) == &amp;#039;/Subtype /MathML&amp;#039; then&lt;br /&gt;
			if head.prev ~= nil and head.prev.id == math_node and head.prev.subtype == 0 then&lt;br /&gt;
				tail = head&lt;br /&gt;
				for test_node in node.traverse_id(math_node, head.next) do&lt;br /&gt;
					if test_node.subtype == 1 then&lt;br /&gt;
						tail = test_node&lt;br /&gt;
						break&lt;br /&gt;
					end&lt;br /&gt;
				end&lt;br /&gt;
				w, h, d = node.dimensions(head.prev, tail)&lt;br /&gt;
				hbox = {width=w,height=h,depth=d}&lt;br /&gt;
			end&lt;br /&gt;
			--[[ texio.write_nl(string.format(&amp;quot;add height %gpt, width %gpt, depth %gpt&amp;quot;,&lt;br /&gt;
                                 hbox.height / 2^16, &lt;br /&gt;
                                 hbox.width / 2^16, &lt;br /&gt;
                                 hbox.depth / 2^16))&lt;br /&gt;
                        --]]&lt;br /&gt;
			head.width  = hbox.width&lt;br /&gt;
			head.height = hbox.height&lt;br /&gt;
			head.depth  = hbox.depth&lt;br /&gt;
		else&lt;br /&gt;
			-- texio.write_nl(&amp;#039;found node &amp;#039; .. node.type(head.id))&lt;br /&gt;
		end&lt;br /&gt;
		head = head.next&lt;br /&gt;
	end&lt;br /&gt;
end&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
In order to use the above depicted code snippets in your LaTeX documents, simply generate a file named &amp;#039;&amp;#039;mathml.lua&amp;#039;&amp;#039; and include the Lua code in a LaTeX document:&lt;br /&gt;
&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
\pdfcompresslevel=0 % to make everything visible in the pdf&lt;br /&gt;
\documentclass{article}&lt;br /&gt;
\usepackage{amssymb}&lt;br /&gt;
\usepackage{luacode}&lt;br /&gt;
&lt;br /&gt;
\directlua{dofile(&amp;quot;mathml.lua&amp;quot;)}&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
Interestingly, with Mac OSX PDF Preview one can hover over the formula areas and the appropriate text content pops up.&lt;br /&gt;
&lt;br /&gt;
Alternatively, the Apache Java project PDFbox, [http://pdfbox.apache.org/index.html http://pdfbox.apache.org/index.html] may be quickly extended to allow for the extraction of previously tagged math areas, see here [https://gist.github.com/2018466 https://gist.github.com/2018466]&lt;/div&gt;</summary>
		<author><name>Esteis</name></author>
		
	</entry>
</feed>