Use our Document Optimization API to develop high-level, platform-independent software in Python. This is a powerful solution to reduce document and image size using Python.
Make large files smaller without loss of quality. Delete unnecessary and unused data. Shrink the size of Word and HTML files in Python. Our Python library provides developers with an integrated API to optimize documents in various formats.
This Python library is a stand-alone solution and doesn't require any third party software installed.
The maximum effect can be obtained by optimizing high-resolution images, as well as documents containing such images. Our Python library will help you to compress DOCX, DOC, RTF, ODT, EPUB, HTML, JPG, PNG files.
Optimization of documents without loss of quality made easy with 'Aspose.Words for Python via .NET'. The following example shows how to shrink the size of a Word document in Python:
pip install aspose-words
Copy
import aspose.words as aw
doc = aw.Document("Input.docx")
doc.cleanup()
shapes = [node.as_shape() for node in doc.get_child_nodes(aw.NodeType.SHAPE, True)]
for shape in shapes:
if shape.isImage:
# It's up to developer to choose the library for image compression.
image = Image.open(shape.image_data.to_stream())
# ...
# Compress image and set it back to the shape.
shape.image_data.set_image("yourCompressedImage")
save_options = aw.saving.OoxmlSaveOptions
save_options.compression_level = aw.saving.CompressionLevel.MAXIMUM
doc.save("Output.docx", save_options)
import aspose.words as aw
doc = aw.Document("Input.docx")
doc.cleanup()
shapes = [node.as_shape() for node in doc.get_child_nodes(aw.NodeType.SHAPE, True)]
for shape in shapes:
if shape.isImage:
# It's up to developer to choose the library for image compression.
image = Image.open(shape.image_data.to_stream())
# ...
# Compress image and set it back to the shape.
shape.image_data.set_image("yourCompressedImage")
save_options = aw.saving.PdfSaveOptions
save_options.cache_background_graphics = true
doc.save("Output.docx", save_options)
import aspose.words as aw
doc = aw.Document("Input.docx")
doc.cleanup()
shapes = [node.as_shape() for node in doc.get_child_nodes(aw.NodeType.SHAPE, True)]
for shape in shapes:
if shape.isImage:
# It's up to developer to choose the library for image compression.
image = Image.open(shape.image_data.to_stream())
# ...
# Compress image and set it back to the shape.
shape.image_data.set_image("yourCompressedImage")
doc.save("Output.docx")
import aspose.words as aw
doc = aw.Document("Input.docx")
doc.cleanup()
shapes = [node.as_shape() for node in doc.get_child_nodes(aw.NodeType.SHAPE, True)]
for shape in shapes:
if shape.isImage:
# It's up to developer to choose the library for image compression.
image = Image.open(shape.image_data.to_stream())
# ...
# Compress image and set it back to the shape.
shape.image_data.set_image("yourCompressedImage")
save_options = aw.saving.OoxmlSaveOptions
save_options.compression_level = aw.saving.CompressionLevel.MAXIMUM
doc.save("Output.docx", save_options)
import aspose.words as aw
doc = aw.Document()
builder = aw.DocumentBuilder(doc)
shape = builder.InsertImage("Input.docx")
save_options = aw.saving.ImageSaveOptions(aw.SaveFormat.DOCX)
shape.get_shape_renderer().save("Output.docx", save_options)
import aspose.words as aw
renderer = aw.pdf2word.fixedformats.PdfFixedRenderer()
pdf_read_options = aw.pdf2word.fixedformats.PdfFixedOptions()
pdf_read_options.image_format = aw.pdf2word.fixedformats.FixedImageFormat.JPEG
pdf_read_options.jpeg_quality = 50
with open ("Input.docx", 'rb') as pdf_stream:
pages_stream = renderer.save_pdf_as_images(pdf_stream, pdf_read_options);
builder = aw.DocumentBuilder()
for i in range(0, len(pages_stream)):
# Set maximum page size to avoid the current page image scaling.
max_page_dimension = 1584
page_setup = builder.page_setup
set_page_size(page_setup, max_page_dimension, max_page_dimension)
page_image = builder.insert_image(pages_stream[i])
set_page_size(page_setup, page_image.width, page_image.height)
page_setup.top_margin = 0
page_setup.left_margin = 0
page_setup.bottom_margin = 0
page_setup.right_margin = 0
if i != len(pages_stream) - 1:
builder.insert_break(aw.BreakType.SECTION_BREAK_NEW_PAGE)
save_options = aw.saving.PdfSaveOptions()
save_options.cache_background_graphics = true
builder.document.save("Output.docx", save_options)
def set_page_size(page_setup, width, height):
page_setup.page_width = width;
page_setup.page_height = height;
We host our Python packages in PyPi repositories. Please follow the step-by-step instructions on how to install "Aspose.Words for Python via .NET" to your developer environment.
This package is compatible with Python ≥3.5 and <3.12. If you develop software for Linux, please have a look at additional requirements for gcc and libpython in Product Documentation.