Building hierarchical numbered lists in Python-Docx compatible with Google Docs upload

I’m trying to build nested numbered lists with python-docx that work properly when uploaded to Google Docs. Single-level lists work fine, but multi-level lists cause issues. The numbering gets messed up with some items showing no numbers and others showing wrong values.

When I upload the generated docx file to Google Docs, it gets worse. The main items keep their numbers but sub-items lose their numbering completely. I tested by creating a similar document directly in MS Word and uploading it to Google Docs, and that works perfectly. So the issue is definitely in how I’m creating the list structure with python-docx.

Here’s my test code that shows the problem:

from docx import Document
from docx.shared import Inches
from docx.oxml import OxmlElement
from docx.oxml.ns import qn

INDENT_SIZE = 0.4
MAX_DEPTH = 6.0
current_item = None

def apply_numbering(document, para=None, previous=None, depth=None):
    def find_next_abstract_id(numbering_part):
        abstracts = numbering_part.findall(qn('w:abstractNum'))
        used_ids = [int(abstract.get(qn('w:abstractNumId'))) for abstract in abstracts]
        return max(used_ids) + 1 if used_ids else 0

    def find_next_number_id(numbering_part):
        numbers = numbering_part.findall(qn('w:num'))
        used_ids = [int(number.get(qn('w:numId'))) for number in numbers]
        return max(used_ids) + 1 if used_ids else 0

    def build_abstract_numbering(numbering_part, depth_level):
        new_abstract_id = find_next_abstract_id(numbering_part)
        abstract_element = OxmlElement('w:abstractNum')
        abstract_element.set(qn('w:abstractNumId'), str(new_abstract_id))

        level_element = OxmlElement('w:lvl')
        level_element.set(qn('w:ilvl'), str(depth_level))

        start_element = OxmlElement('w:start')
        start_element.set(qn('w:val'), '1')
        level_element.append(start_element)

        format_element = OxmlElement('w:numFmt')
        format_element.set(qn('w:val'), 'decimal')
        level_element.append(format_element)

        text_element = OxmlElement('w:lvlText')
        text_element.set(qn('w:val'), '%1.')
        level_element.append(text_element)

        justify_element = OxmlElement('w:lvlJc')
        justify_element.set(qn('w:val'), 'left')
        level_element.append(justify_element)

        style_element = OxmlElement('w:pStyle')
        style_element.set(qn('w:val'), 'LIST_NUMBER')
        level_element.append(style_element)

        abstract_element.append(level_element)
        numbering_part.append(abstract_element)
        return new_abstract_id

    def build_number_instance(numbering_part, abstract_id):
        new_number_id = find_next_number_id(numbering_part)
        number_element = OxmlElement('w:num')
        number_element.set(qn('w:numId'), str(new_number_id))

        reference_element = OxmlElement('w:abstractNumId')
        reference_element.set(qn('w:val'), str(abstract_id))
        number_element.append(reference_element)

        numbering_part.append(number_element)
        return new_number_id

    numbering_definitions = document.part.numbering_part.numbering_definitions._numbering

    if previous is None or previous._p.pPr is None or previous._p.pPr.numPr is None or previous._p.pPr.numPr.numId is None:
        depth_level = 0 if depth is None else depth
        abstract_id = build_abstract_numbering(numbering_definitions, depth_level)
        number_id = build_number_instance(numbering_definitions, abstract_id)
    else:
        depth_level = previous._p.pPr.numPr.ilvl.val if depth is None else depth
        number_id = previous._p.pPr.numPr.numId.val

    para._p.get_or_add_pPr().get_or_add_numPr().get_or_add_numId().val = number_id
    para._p.get_or_add_pPr().get_or_add_numPr().get_or_add_ilvl().val = depth_level

def create_list_item(document, content, depth, style_name):
    global current_item
    new_para = document.add_paragraph(content, style=style_name)
    new_para.paragraph_format.left_indent = Inches(min(depth * INDENT_SIZE, MAX_DEPTH))
    new_para.paragraph_format.line_spacing = 1

    if style_name == 'List Number':
        apply_numbering(document=document, para=new_para, previous=current_item, depth=depth)
        current_item = new_para

doc = Document()
doc.add_heading('Section A')
create_list_item(doc=doc, content='Main point one', depth=0, style_name='List Number')
create_list_item(doc=doc, content='Main point two', depth=0, style_name='List Number')
create_list_item(doc=doc, content='Main point three', depth=0, style_name='List Number')

current_item = None
doc.add_heading('Section B')
create_list_item(doc=doc, content='Primary item', depth=0, style_name='List Number')
create_list_item(doc=doc, content='Sub item alpha', depth=1, style_name='List Number')
create_list_item(doc=doc, content='Sub item beta', depth=1, style_name='List Number')
create_list_item(doc=doc, content='Secondary item', depth=0, style_name='List Number')
create_list_item(doc=doc, content='Final item', depth=0, style_name='List Number')
create_list_item(doc=doc, content='Final sub item', depth=1, style_name='List Bullet')
create_list_item(doc=doc, content='Last item', depth=0, style_name='List Number')

doc.save('multilevel_test.docx')

The document looks correct in Word but when I upload it to Google Docs, the nested items lose their numbers. I think I’m missing something about how the numbering XML should be structured for multiple levels. How can I fix this so the nested numbering works properly in both Word and Google Docs?

Google Docs is super picky about docx numbering XML. Try making separate abstractNum definitions for each level instead of cramming multiple lvl elements into one. Also double-check your numFmt values for different depths - Google Docs expects specific formatting patterns or it’ll mess up your hierarchy when importing.

Had this exact issue last year. The problem’s with how abstractNum elements handle multiple levels. You’re creating one abstractNum per depth level, but Google Docs wants a single abstractNum with all lvl elements defined upfront.

Here’s the fix: create one comprehensive abstractNum that includes all your levels (0 through whatever max depth you need). Then reference that same abstractNum across all paragraphs and just vary the ilvl values.

In your build_abstract_numbering function, don’t create separate abstracts for each level. Instead, make one abstract with multiple lvl children where each has different ilvl, numFmt, and lvlText values. Google Docs will understand the complete hierarchy structure during import.

Your problem is missing pPr properties that Google Docs needs for list inheritance. I’ve hit this same issue before - Google Docs wants explicit hanging indent settings and proper numFmt patterns for each level. You’re missing hanging indents in your level elements. Add pPr with hanging indent properties to each lvl element - try 0.25 inches hanging with 0.5 inch left indent for level 0, then bump it up for deeper levels. Fix your lvlText patterns too. Don’t use ‘%1.’ everywhere - use ‘%1.’ for level 0, ‘%1.%2.’ for level 1, and so on. This shows Google Docs the complete numbering path. Make sure you’re setting correct numFmt values for each level. Google Docs expects ‘decimal’ for most levels but gets picky about mixed formats in the same abstractNum structure.

This topic was automatically closed 4 days after the last reply. New replies are no longer allowed.