Clean XHTML code in odt2xhtml converter

Registered by Gaëtan Delannay

When converting XHTML to ODT in xhtml2odt.py, we should remove a series of tags and their content, like "meta" and "style" (at least in a first step). Here is an example of code cut-and-pasted from Word to Kupu:

Point A29 = <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><meta name="ProgId"
content="Word.Document" /><meta name="Generator" content="Microsoft Word 11" /><meta name="Originator"
content="Microsoft Word 11" /><link href="file:///C:\DOCUME~1\NOWAKO~1\LOCALS~1\Temp\msohtml1\02\clip_filelist.xml" rel="File-List" /><style>
&amp;amp;amp;lt;!--
/* Font Definitions */
@font-face
{font-family:Verdana;
panose-1:2 11 6 4 3 5 4 4 2 4;
mso-font-charset:0;
mso-generic-font-family:swiss;
mso-font-pitch:variable;
mso-font-signature:536871559 0 0 0 415 0;}
@font-face
{font-family:Cambria;
panose-1:2 4 5 3 5 4 6 3 2 4;
mso-font-charset:0;
mso-generic-font-family:roman;
mso-font-pitch:variable;
mso-font-signature:-1610611985 1073741899 0 0 159 0;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{mso-style-parent:"";
margin:0cm;
margin-bottom:.0001pt;
mso-pagination:widow-orphan;
font-size:10.0pt;
font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";}
h1
{mso-style-next:Normal;
margin-top:12.0pt;
margin-right:0cm;
margin-bottom:3.0pt;
margin-left:0cm;
mso-pagination:widow-orphan;
page-break-after:avoid;
mso-outline-level:1;
font-size:16.0pt;
font-family:Cambria;
mso-font-kerning:16.0pt;}
@page Section1
{size:612.0pt 792.0pt;
margin:70.85pt 70.85pt 70.85pt 70.85pt;
mso-header-margin:36.0pt;
mso-footer-margin:36.0pt;
mso-paper-source:0;}
div.Section1
{page:Section1;}
/* List Definitions */
@list l0
{mso-list-id:1427996361;
mso-list-type:hybrid;
mso-list-template-ids:2073713530 -1 -1 -1 -1 -1 -1 -1 -1 -1;}
@list l0:level1
{mso-level-tab-stop:none;
mso-level-number-position:left;
text-indent:-18.0pt;}
ol
{margin-bottom:0cm;}
ul
{margin-bottom:0cm;}
--&amp;amp;amp;gt;
</style>

Point A28 - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><meta name="ProgId" content="Word.Document" /><meta name="Generator" content="Microsoft Word 11" /><meta name="Originator" content="Microsoft Word 11" />
<ol><li><link href="file:///C:\DOCUME~1\NOWAKO~1\LOCALS~1\Temp\msohtml1\05\clip_filelist.xml" rel="File-List" /><link href="file:///C:\DOCUME~1\NOWAKO~1\LOCALS~1\Temp\msohtml1\05\clip_filelist.xml" rel="File-List" />
<style>
&amp;amp;lt;!--
/* Font Definitions */
@font-face
{font-family:Wingdings;
panose-1:5 0 0 0 0 0 0 0 0 0;
mso-font-charset:2;
mso-generic-font-family:auto;
mso-font-pitch:variable;
mso-font-signature:0 268435456 0 0 -2147483648 0;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{mso-style-parent:"";
margin:0cm;
margin-bottom:.0001pt;
mso-pagination:widow-orphan;
font-size:12.0pt;
font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";}
p.Paragraphedeliste, li.Paragraphedeliste, div.Paragraphedeliste
{mso-style-name:"Paragraphe de liste";
margin-top:0cm;
margin-right:0cm;
margin-bottom:0cm;
margin-left:36.0pt;
margin-bottom:.0001pt;
mso-add-space:auto;
mso-pagination:widow-orphan;
font-size:12.0pt;
font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";}
p.ParagraphedelisteCxSpFirst, li.ParagraphedelisteCxSpFirst, div.ParagraphedelisteCxSpFirst
{mso-style-name:"Paragraphe de listeCxSpFirst";
mso-style-type:export-only;
margin-top:0cm;
margin-right:0cm;
margin-bottom:0cm;
margin-left:36.0pt;
margin-bottom:.0001pt;
mso-add-space:auto;
mso-pagination:widow-orphan;
font-size:12.0pt;
font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";}
p.ParagraphedelisteCxSpMiddle, li.ParagraphedelisteCxSpMiddle, div.ParagraphedelisteCxSpMiddle
{mso-style-name:"Paragraphe de listeCxSpMiddle";
mso-style-type:export-only;
margin-top:0cm;
margin-right:0cm;
margin-bottom:0cm;
margin-left:36.0pt;
margin-bottom:.0001pt;
mso-add-space:auto;
mso-pagination:widow-orphan;
font-size:12.0pt;
font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";}
p.ParagraphedelisteCxSpLast, li.ParagraphedelisteCxSpLast, div.ParagraphedelisteCxSpLast
{mso-style-name:"Paragraphe de listeCxSpLast";
mso-style-type:export-only;
margin-top:0cm;
margin-right:0cm;
margin-bottom:0cm;
margin-left:36.0pt;
margin-bottom:.0001pt;
mso-add-space:auto;
mso-pagination:widow-orphan;
font-size:12.0pt;
font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";}
@page Section1
{size:612.0pt 792.0pt;
margin:70.85pt 70.85pt 70.85pt 70.85pt;
mso-header-margin:36.0pt;
mso-footer-margin:36.0pt;
mso-paper-source:0;}
div.Section1
{page:Section1;}
/* List Definitions */
@list l0
{mso-list-id:774984297;
mso-list-type:hybrid;
mso-list-template-ids:1562389278 599454246 135004163 135004165 135004161 135004163 135004165 135004161 135004163 135004165;}
@list l0:level1
{mso-level-start-at:12;
mso-level-number-format:bullet;
mso-level-text:-;
mso-level-tab-stop:none;
mso-level-number-position:left;
text-indent:-18.0pt;
font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";}
ol
{margin-bottom:0cm;}
ul
{margin-bottom:0cm;}
--&amp;amp;gt;
</style> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><meta name="ProgId" content="Word.Document" /><meta name="Generator" content="Microsoft Word 11" /><meta name="Originator" content="Microsoft Word 11" /><style>
&amp;amp;lt;!--
/* Font Definitions */
@font-face
{font-family:Wingdings;
panose-1:5 0 0 0 0 0 0 0 0 0;
mso-font-charset:2;
mso-generic-font-family:auto;
mso-font-pitch:variable;
mso-font-signature:0 268435456 0 0 -2147483648 0;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{mso-style-parent:"";
margin:0cm;
margin-bottom:.0001pt;
mso-pagination:widow-orphan;
font-size:12.0pt;
font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";}
p.Paragraphedeliste, li.Paragraphedeliste, div.Paragraphedeliste
{mso-style-name:"Paragraphe de liste";
margin-top:0cm;
margin-right:0cm;
margin-bottom:0cm;
margin-left:36.0pt;
margin-bottom:.0001pt;
mso-add-space:auto;
mso-pagination:widow-orphan;
font-size:12.0pt;
font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";}
p.ParagraphedelisteCxSpFirst, li.ParagraphedelisteCxSpFirst, div.ParagraphedelisteCxSpFirst
{mso-style-name:"Paragraphe de listeCxSpFirst";
mso-style-type:export-only;
margin-top:0cm;
margin-right:0cm;
margin-bottom:0cm;
margin-left:36.0pt;
margin-bottom:.0001pt;
mso-add-space:auto;
mso-pagination:widow-orphan;
font-size:12.0pt;
font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";}
p.ParagraphedelisteCxSpMiddle, li.ParagraphedelisteCxSpMiddle, div.ParagraphedelisteCxSpMiddle
{mso-style-name:"Paragraphe de listeCxSpMiddle";
mso-style-type:export-only;
margin-top:0cm;
margin-right:0cm;
margin-bottom:0cm;
margin-left:36.0pt;
margin-bottom:.0001pt;
mso-add-space:auto;
mso-pagination:widow-orphan;
font-size:12.0pt;
font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";}
p.ParagraphedelisteCxSpLast, li.ParagraphedelisteCxSpLast, div.ParagraphedelisteCxSpLast
{mso-style-name:"Paragraphe de listeCxSpLast";
mso-style-type:export-only;
margin-top:0cm;
margin-right:0cm;
margin-bottom:0cm;
margin-left:36.0pt;
margin-bottom:.0001pt;
mso-add-space:auto;
mso-pagination:widow-orphan;
font-size:12.0pt;
font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";}
@page Section1
{size:612.0pt 792.0pt;
margin:70.85pt 70.85pt 70.85pt 70.85pt;
mso-header-margin:36.0pt;
mso-footer-margin:36.0pt;
mso-paper-source:0;}
div.Section1
{page:Section1;}
/* List Definitions */
@list l0
{mso-list-id:774984297;
mso-list-type:hybrid;
mso-list-template-ids:1562389278 599454246 135004163 135004165 135004161 135004163 135004165 135004161 135004163 135004165;}
@list l0:level1
{mso-level-start-at:12;
mso-level-number-format:bullet;
mso-level-text:-;
mso-level-tab-stop:none;
mso-level-number-position:left;
text-indent:-18.0pt;
font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";}
ol
{margin-bottom:0cm;}
ul
{margin-bottom:0cm;}
--&amp;amp;gt;
</style>

Point A27 - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><meta name="ProgId" content="Word.Document" /><meta name="Generator" content="Microsoft Word 11" /><meta name="Originator" content="Microsoft Word 11" />
<link href="file:///C:\DOCUME~1\NOWAKO~1\LOCALS~1\Temp\msohtml1\05\clip_filelist.xml" rel="File-List" />
<style>
&amp;amp;lt;!--
/* Font Definitions */
@font-face
{font-family:Wingdings;
panose-1:5 0 0 0 0 0 0 0 0 0;
mso-font-charset:2;
mso-generic-font-family:auto;
mso-font-pitch:variable;
mso-font-signature:0 268435456 0 0 -2147483648 0;}
@font-face
{font-family:Tahoma;
panose-1:2 11 6 4 3 5 4 4 2 4;
mso-font-charset:0;
mso-generic-font-family:swiss;
mso-font-pitch:variable;
mso-font-signature:1627421319 -2147483648 8 0 66047 0;}
@font-face
{font-family:Verdana;
panose-1:2 11 6 4 3 5 4 4 2 4;
mso-font-charset:0;
mso-generic-font-family:swiss;
mso-font-pitch:variable;
mso-font-signature:536871559 0 0 0 415 0;}
@font-face
{font-family:"Century Gothic";
panose-1:2 11 5 2 2 2 2 2 2 4;
mso-font-charset:0;
mso-generic-font-family:swiss;
mso-font-pitch:variable;
mso-font-signature:647 0 0 0 159 0;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{mso-style-parent:"";
margin:0cm;
margin-bottom:.0001pt;
mso-pagination:widow-orphan;
font-size:12.0pt;
font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";}
p
{mso-margin-top-alt:auto;
margin-right:0cm;
mso-margin-bottom-alt:auto;
margin-left:0cm;
mso-pagination:widow-orphan;
font-size:12.0pt;
font-family:"Times New Roman";
mso-fareast-font-family:"Times New Roman";}
@page Section1
{size:612.0pt 792.0pt;
margin:70.85pt 70.85pt 70.85pt 70.85pt;
mso-header-margin:36.0pt;
mso-footer-margin:36.0pt;
mso-paper-source:0;}
div.Section1
{page:Section1;}
/* List Definitions */
@list l0
{mso-list-id:1996564220;
mso-list-type:hybrid;
mso-list-template-ids:1961685060 1179260850 135004163 135004165 135004161 135004163 135004165 135004161 135004163 135004165;}
@list l0:level1
{mso-level-start-at:0;
mso-level-number-format:bullet;
mso-level-text:-;
mso-level-tab-stop:none;
mso-level-number-position:left;
text-indent:-18.0pt;
font-family:Verdana;
mso-fareast-font-family:"Times New Roman";
mso-bidi-font-family:"Times New Roman";}
ol
{margin-bottom:0cm;}
ul
{margin-bottom:0cm;}
--&amp;amp;gt;
</style>

Blueprint information

Status:
Complete
Approver:
Gaëtan Delannay
Priority:
Undefined
Drafter:
Gaëtan Delannay
Direction:
Needs approval
Assignee:
Gaëtan Delannay
Definition:
Obsolete
Series goal:
None
Implementation:
Unknown
Milestone target:
None
Completed by
Gaëtan Delannay

Related branches

Sprints

Whiteboard

This will not be done at the pod level. appy integrates a XHTML cleaner in appy.shared.xml_parser.XhtmlCleaner that must be called as a preamble (appy.gen calls it on String fields with format=String.XHTML for example).

(?)

Work Items

This blueprint contains Public information 
Everyone can see this information.

Subscribers

No subscribers.