var assert = require("assert"); var path = require("path"); var fs = require("fs"); var _ = require("underscore"); var mammoth = require("../"); var promises = require("../lib/promises"); var results = require("../lib/results"); var testing = require("./testing"); var test = require("./test")(module); var testData = testing.testData; var createFakeDocxFile = testing.createFakeDocxFile; test('should convert docx containing one paragraph to single p element', function() { var docxPath = path.join(__dirname, "test-data/single-paragraph.docx"); return mammoth.convertToHtml({path: docxPath}).then(function(result) { assert.equal(result.value, "

Walking on imported air

"); assert.deepEqual(result.messages, []); }); }); test('should convert docx represented by a Buffer', function() { var docxPath = path.join(__dirname, "test-data/single-paragraph.docx"); return promises.nfcall(fs.readFile, docxPath) .then(function(buffer) { return mammoth.convertToHtml({buffer: buffer}); }) .then(function(result) { assert.equal(result.value, "

Walking on imported air

"); assert.deepEqual(result.messages, []); }); }); test('should read docx xml files with unicode byte order mark', function() { var docxPath = path.join(__dirname, "test-data/utf8-bom.docx"); return mammoth.convertToHtml({path: docxPath}).then(function(result) { assert.equal(result.value, "

This XML has a byte order mark.

"); assert.deepEqual(result.messages, []); }); }); test('empty paragraphs are ignored by default', function() { var docxPath = path.join(__dirname, "test-data/empty.docx"); return mammoth.convertToHtml({path: docxPath}).then(function(result) { assert.equal(result.value, ""); assert.deepEqual(result.messages, []); }); }); test('empty paragraphs are preserved if ignoreEmptyParagraphs is false', function() { var docxPath = path.join(__dirname, "test-data/empty.docx"); return mammoth.convertToHtml({path: docxPath}, {ignoreEmptyParagraphs: false}).then(function(result) { assert.equal(result.value, "

"); assert.deepEqual(result.messages, []); }); }); test('style map can be expressed as string', function() { var docxFile = createFakeDocxFile({ "word/document.xml": testData("simple/word/document.xml") }); var options = { styleMap: "p => h1" }; return mammoth.convertToHtml({file: docxFile}, options).then(function(result) { assert.equal("

Hello.

", result.value); }); }); test('style map can be expressed as array of style mappings', function() { var docxFile = createFakeDocxFile({ "word/document.xml": testData("simple/word/document.xml") }); var options = { styleMap: ["p => h1"] }; return mammoth.convertToHtml({file: docxFile}, options).then(function(result) { assert.equal("

Hello.

", result.value); }); }); test('embedded style map is used if present', function() { var docxPath = path.join(__dirname, "test-data/embedded-style-map.docx"); return mammoth.convertToHtml({path: docxPath}).then(function(result) { assert.equal(result.value, "

Walking on imported air

"); assert.deepEqual(result.messages, []); }); }); test('explicit style map takes precedence over embedded style map', function() { var docxPath = path.join(__dirname, "test-data/embedded-style-map.docx"); var options = { styleMap: ["p => p"] }; return mammoth.convertToHtml({path: docxPath}, options).then(function(result) { assert.equal(result.value, "

Walking on imported air

"); assert.deepEqual(result.messages, []); }); }); test('explicit style map is combined with embedded style map', function() { var docxPath = path.join(__dirname, "test-data/embedded-style-map.docx"); var options = { styleMap: ["r => strong"] }; return mammoth.convertToHtml({path: docxPath}, options).then(function(result) { assert.equal(result.value, "

Walking on imported air

"); assert.deepEqual(result.messages, []); }); }); test('embedded style maps can be disabled', function() { var docxPath = path.join(__dirname, "test-data/embedded-style-map.docx"); var options = { includeEmbeddedStyleMap: false }; return mammoth.convertToHtml({path: docxPath}, options).then(function(result) { assert.equal(result.value, "

Walking on imported air

"); assert.deepEqual(result.messages, []); }); }); test('embedded style map can be written and then read', function() { var docxPath = path.join(__dirname, "test-data/single-paragraph.docx"); return promises.nfcall(fs.readFile, docxPath) .then(function(buffer) { return mammoth.embedStyleMap({buffer: buffer}, "p => h1"); }) .then(function(docx) { return mammoth.convertToHtml({buffer: docx.toBuffer()}); }) .then(function(result) { assert.equal(result.value, "

Walking on imported air

"); assert.deepEqual(result.messages, []); }); }); test('embedded style map can be retrieved', function() { var docxPath = path.join(__dirname, "test-data/single-paragraph.docx"); return promises.nfcall(fs.readFile, docxPath) .then(function(buffer) { return mammoth.embedStyleMap({buffer: buffer}, "p => h1"); }) .then(function(docx) { return mammoth.readEmbeddedStyleMap({buffer: docx.toBuffer()}); }) .then(function(styleMap) { assert.equal(styleMap, "p => h1"); }); }); test('warning if style mapping is not understood', function() { var docxPath = path.join(__dirname, "test-data/single-paragraph.docx"); var options = { styleMap: "????\np => h1" }; return mammoth.convertToHtml({path: docxPath}, options).then(function(result) { assert.equal("

Walking on imported air

", result.value); var warning = "Did not understand this style mapping, so ignored it: ????\n" + 'Error was at character number 1: Expected element type but got unrecognisedCharacter "?"'; assert.deepEqual(result.messages, [results.warning(warning)]); }); }); test('options are passed to document converter when calling mammoth.convertToHtml', function() { var docxFile = createFakeDocxFile({ "word/document.xml": testData("simple/word/document.xml") }); var options = { styleMap: "p => h1" }; return mammoth.convertToHtml({file: docxFile}, options).then(function(result) { assert.equal("

Hello.

", result.value); }); }); test('options.transformDocument is used to transform document if set', function() { var docxFile = createFakeDocxFile({ "word/document.xml": testData("simple/word/document.xml") }); var options = { transformDocument: function(document) { document.children[0].styleId = "Heading1"; return document; } }; return mammoth.convertToHtml({file: docxFile}, options).then(function(result) { assert.equal("

Hello.

", result.value); }); }); test('mammoth.transforms.paragraph only transforms paragraphs', function() { var docxFile = createFakeDocxFile({ "word/document.xml": testData("simple/word/document.xml") }); var options = { transformDocument: mammoth.transforms.paragraph(function(paragraph) { return _.extend(paragraph, {styleId: "Heading1"}); }) }; return mammoth.convertToHtml({file: docxFile}, options).then(function(result) { assert.equal("

Hello.

", result.value); }); }); test('inline images referenced by path relative to part are included in output', function() { var docxPath = path.join(__dirname, "test-data/tiny-picture.docx"); return mammoth.convertToHtml({path: docxPath}).then(function(result) { assert.equal(result.value, '

'); }); }); test('inline images referenced by path relative to base are included in output', function() { var docxPath = path.join(__dirname, "test-data/tiny-picture-target-base-relative.docx"); return mammoth.convertToHtml({path: docxPath}).then(function(result) { assert.equal(result.value, '

'); }); }); test('src of inline images can be changed', function() { var docxPath = path.join(__dirname, "test-data/tiny-picture.docx"); var convertImage = mammoth.images.imgElement(function(element) { return element.read("base64").then(function(encodedImage) { return {src: encodedImage.substring(0, 2) + "," + element.contentType}; }); }); return mammoth.convertToHtml({path: docxPath}, {convertImage: convertImage}).then(function(result) { assert.equal(result.value, '

'); }); }); test('images stored outside of document are included in output', function() { var docxPath = path.join(__dirname, "test-data/external-picture.docx"); return mammoth.convertToHtml({path: docxPath}).then(function(result) { assert.equal(result.value, '

'); assert.deepEqual(result.messages, []); }); }); test('error if images stored outside of document are specified when passing file without path', function() { var docxPath = path.join(__dirname, "test-data/external-picture.docx"); var buffer = fs.readFileSync(docxPath); return mammoth.convertToHtml({buffer: buffer}).then(function(result) { assert.equal(result.value, ''); assert.equal(result.messages[0].message, "could not find external image 'tiny-picture.png', path of input document is unknown"); assert.equal(result.messages[0].type, "error"); }); }); test('simple list is converted to list elements', function() { var docxPath = path.join(__dirname, "test-data/simple-list.docx"); return mammoth.convertToHtml({path: docxPath}).then(function(result) { assert.equal(result.value, ''); }); }); test('word tables are converted to html tables', function() { var docxPath = path.join(__dirname, "test-data/tables.docx"); return mammoth.convertToHtml({path: docxPath}).then(function(result) { var expectedHtml = "

Above

" + "" + "" + "" + "

Top left

Top right

Bottom left

Bottom right

" + "

Below

"; assert.equal(result.value, expectedHtml); assert.deepEqual(result.messages, []); }); }); test('footnotes are appended to text', function() { // TODO: don't duplicate footnotes with multiple references var docxPath = path.join(__dirname, "test-data/footnotes.docx"); var options = { idPrefix: "doc-42-" }; return mammoth.convertToHtml({path: docxPath}, options).then(function(result) { var expectedOutput = '

Ouch' + '[1].' + '[2]

' + '
  1. A tachyon walks into a bar.

  2. ' + '
  3. Fin.

'; assert.equal(result.value, expectedOutput); assert.deepEqual(result.messages, []); }); }); test('endnotes are appended to text', function() { var docxPath = path.join(__dirname, "test-data/endnotes.docx"); var options = { idPrefix: "doc-42-" }; return mammoth.convertToHtml({path: docxPath}, options).then(function(result) { var expectedOutput = '

Ouch' + '[1].' + '[2]

' + '
  1. A tachyon walks into a bar.

  2. ' + '
  3. Fin.

'; assert.equal(result.value, expectedOutput); assert.deepEqual(result.messages, []); }); }); test('relationships are handled properly in footnotes', function() { var docxPath = path.join(__dirname, "test-data/footnote-hyperlink.docx"); var options = { idPrefix: "doc-42-" }; return mammoth.convertToHtml({path: docxPath}, options).then(function(result) { var expectedOutput = '

[1]

' + '
  1. Example

'; assert.equal(result.value, expectedOutput); assert.deepEqual(result.messages, []); }); }); test('when style mapping is defined for comment references then comments are included', function() { var docxPath = path.join(__dirname, "test-data/comments.docx"); var options = { idPrefix: "doc-42-", styleMap: "comment-reference => sup" }; return mammoth.convertToHtml({path: docxPath}, options).then(function(result) { var expectedOutput = ( '

Ouch' + '[MW1].' + '[MW2]

' + '
Comment [MW1]

A tachyon walks into a bar.

' + '
Comment [MW2]

Fin.

' ); assert.equal(result.value, expectedOutput); assert.deepEqual(result.messages, []); }); }); test('textboxes are read', function() { var docxPath = path.join(__dirname, "test-data/text-box.docx"); return mammoth.convertToHtml({path: docxPath}).then(function(result) { var expectedOutput = '

Datum plane

'; assert.equal(result.value, expectedOutput); }); }); test('underline is ignored by default', function() { var docxPath = path.join(__dirname, "test-data/underline.docx"); return mammoth.convertToHtml({path: docxPath}).then(function(result) { assert.equal(result.value, '

The Sunset Tree

'); }); }); test('underline can be configured with style mapping', function() { var docxPath = path.join(__dirname, "test-data/underline.docx"); return mammoth.convertToHtml({path: docxPath}, {styleMap: "u => em"}).then(function(result) { assert.equal(result.value, '

The Sunset Tree

'); }); }); test('strikethrough is converted to by default', function() { var docxPath = path.join(__dirname, "test-data/strikethrough.docx"); return mammoth.convertToHtml({path: docxPath}).then(function(result) { assert.equal(result.value, "

Today's Special: Salmon Sold out

"); }); }); test('strikethrough conversion can be configured with style mappings', function() { var docxPath = path.join(__dirname, "test-data/strikethrough.docx"); return mammoth.convertToHtml({path: docxPath}, {styleMap: "strike => del"}).then(function(result) { assert.equal(result.value, "

Today's Special: Salmon Sold out

"); }); }); test('indentation is used if prettyPrint is true', function() { var docxPath = path.join(__dirname, "test-data/single-paragraph.docx"); return mammoth.convertToHtml({path: docxPath}, {prettyPrint: true}).then(function(result) { assert.equal(result.value, "

\n Walking on imported air\n

"); assert.deepEqual(result.messages, []); }); }); test('using styleMapping throws error', function() { try { mammoth.styleMapping(); } catch (error) { assert.equal( error.message, 'Use a raw string instead of mammoth.styleMapping e.g. "p[style-name=\'Title\'] => h1" instead of mammoth.styleMapping("p[style-name=\'Title\'] => h1")' ); } }); test('can convert single paragraph to markdown', function() { var docxPath = path.join(__dirname, "test-data/single-paragraph.docx"); return mammoth.convertToMarkdown({path: docxPath}).then(function(result) { assert.equal(result.value, "Walking on imported air\n\n"); assert.deepEqual(result.messages, []); }); }); test('extractRawText only retains raw text', function() { var docxPath = path.join(__dirname, "test-data/simple-list.docx"); return mammoth.extractRawText({path: docxPath}).then(function(result) { assert.equal(result.value, 'Apple\n\nBanana\n\n'); }); }); test('extractRawText can use .docx files represented by a Buffer', function() { var docxPath = path.join(__dirname, "test-data/single-paragraph.docx"); return promises.nfcall(fs.readFile, docxPath) .then(function(buffer) { return mammoth.extractRawText({buffer: buffer}); }) .then(function(result) { assert.equal(result.value, "Walking on imported air\n\n"); assert.deepEqual(result.messages, []); }); }); test('should throw error if file is not a valid docx document', function() { var docxPath = path.join(__dirname, "test-data/empty.zip"); return mammoth.convertToHtml({path: docxPath}).then(function(result) { assert.ok(false, "Expected error"); }, function(error) { assert.equal(error.message, "Could not find main document part. Are you sure this is a valid .docx file?"); }); });