I am currently using python-docx to create documents that feature multilevel numbered lists. While single-level lists work without issues, I face significant challenges when I try to incorporate nested lists.
The main problem arises with the nested items, where their numbering appears broken. For some entries, the sub-items have no numbers, while for others, they show incorrect values like simply “1” instead of the expected hierarchical format.
This situation worsens after I upload the document to Google Docs. Even if the Word file appears correct, the sub-items lose their numbering after the upload.
To troubleshoot, I created a similar document directly in Microsoft Word, and the upload to Google Docs went smoothly, indicating that the issue lies within how I’m generating the lists using python-docx.
Here’s a code snippet illustrating my approach and the complications I encounter:
from docx import Document
from docx.shared import Inches
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
INDENT_SIZE = 0.4
MAX_DEPTH = 6.0
current_item = None
def create_numbered_item(document, content=None, previous=None, depth=None):
def find_next_abstract_id(numbering_part):
abstract_elements = numbering_part.findall(qn('w:abstractNum'))
used_ids = [int(elem.get(qn('w:abstractNumId'))) for elem in abstract_elements]
return max(used_ids) + 1 if used_ids else 0
def find_next_num_id(numbering_part):
num_elements = numbering_part.findall(qn('w:num'))
used_ids = [int(elem.get(qn('w:numId'))) for elem in num_elements]
return max(used_ids) + 1 if used_ids else 0
def build_abstract_numbering(numbering_part, current_depth):
new_abstract_id = find_next_abstract_id(numbering_part)
abstract_element = OxmlElement('w:abstractNum')
abstract_element.set(qn('w:abstractNumId'), str(new_abstract_id))
level_element = OxmlElement('w:lvl')
level_element.set(qn('w:ilvl'), str(current_depth))
start_element = OxmlElement('w:start')
start_element.set(qn('w:val'), '1')
level_element.append(start_element)
format_element = OxmlElement('w:numFmt')
format_element.set(qn('w:val'), 'decimal')
level_element.append(format_element)
text_element = OxmlElement('w:lvlText')
text_element.set(qn('w:val'), '%1.')
level_element.append(text_element)
justify_element = OxmlElement('w:lvlJc')
justify_element.set(qn('w:val'), 'left')
level_element.append(justify_element)
abstract_element.append(level_element)
numbering_part.append(abstract_element)
return new_abstract_id
def build_numbering_instance(numbering_part, abstract_id):
new_num_id = find_next_num_id(numbering_part)
num_element = OxmlElement('w:num')
num_element.set(qn('w:numId'), str(new_num_id))
abstract_ref = OxmlElement('w:abstractNumId')
abstract_ref.set(qn('w:val'), str(abstract_id))
num_element.append(abstract_ref)
numbering_part.append(num_element)
return new_num_id
numbering_def = document.part.numbering_part.numbering_definitions._numbering
if previous is None or previous._p.pPr is None:
current_depth = 0 if depth is None else depth
abstract_id = build_abstract_numbering(numbering_def, current_depth)
numbering_id = build_numbering_instance(numbering_def, abstract_id)
else:
current_depth = previous._p.pPr.numPr.ilvl.val if depth is None else depth
numbering_id = previous._p.pPr.numPr.numId.val
content._p.get_or_add_pPr().get_or_add_numPr().get_or_add_numId().val = numbering_id
content._p.get_or_add_pPr().get_or_add_numPr().get_or_add_ilvl().val = current_depth
def insert_list_entry(document, text_content, level_depth, entry_style):
global current_item
new_paragraph = document.add_paragraph(text_content, style=entry_style)
new_paragraph.paragraph_format.left_indent = Inches(min(level_depth * INDENT_SIZE, MAX_DEPTH))
new_paragraph.paragraph_format.line_spacing = 1
if entry_style == 'List Number':
create_numbered_item(document=document, content=new_paragraph, previous=current_item, depth=level_depth)
current_item = new_paragraph
doc = Document()
doc.add_heading('Main Section')
insert_list_entry(doc=doc, text_content='Primary Entry', level_depth=0, entry_style='List Number')
insert_list_entry(doc=doc, text_content='Secondary Entry', level_depth=0, entry_style='List Number')
current_item = None
doc.add_heading('Another Section')
insert_list_entry(doc=doc, text_content='Main Point', level_depth=0, entry_style='List Number')
insert_list_entry(doc=doc, text_content='Sub Point A', level_depth=1, entry_style='List Number')
insert_list_entry(doc=doc, text_content='Sub Point B', level_depth=1, entry_style='List Number')
insert_list_entry(doc=doc, text_content='Another Main Point', level_depth=0, entry_style='List Number')
doc.save('nested_lists_output.docx')
It seems I might be overlooking an important aspect of the numbering structure in Word. After uploading to Google Docs, the nested items entirely lose their numbering. Does anyone have insights or solutions for this issue?