I’m working on extracting data from a Notion database table using Selenium but I’m running into issues with rows that aren’t initially visible on the page.
My current workflow is:
- Load the Notion site
- Navigate through each table row
- Hover over the row to reveal the “Open” button
- Click to open the page in side view
- Extract the page content
- Move to the next row and repeat
The main issue is that only 26-28 rows are visible when the page loads, but my table has 47 total rows. Even after scrolling down, my script can’t detect more than 28 rows.
Here’s my function for processing individual cells:
def process_table_row(browser: webdriver.Chrome, row_index: int) -> str:
"""
Processes a single table row and extracts its content.
"""
print(f"Working on row {row_index}...")
row_xpath = f"//*[@id='notion-app']/div/div[1]/div/div[1]/main/div/div/div[3]/div[2]/div/div/div/div[3]/div[2]/div[{row_index}]/div/div[1]/div/div[2]/div/div"
print(f"Finding row {row_index}...")
try:
row_element = WebDriverWait(browser, 10).until(
EC.presence_of_element_located((By.XPATH, row_xpath))
)
print(f"Row {row_index} found.")
except Exception as error:
print(f"Could not find row {row_index}: {error}")
return ""
# scroll container for rows beyond 16
if row_index > 16:
for attempt in range(8):
try:
scroll_container_down(browser, row_element, 50)
print(f"Scrolled to row {row_index}.")
break
except Exception as error:
print(f"Scrolling attempt {attempt + 1} failed: {error}")
# hover over the row
move_to_element(browser, row_element)
# find and click the side peek button
print(f"Looking for side peek button on row {row_index}...")
try:
peek_button = WebDriverWait(browser, 10).until(
EC.element_to_be_clickable(
(By.XPATH, "//div[@aria-label='Open in side peek']")
)
)
print(f"Clicking side peek for row {row_index}...")
peek_button.click()
except Exception as error:
print(f"Side peek button not found for row {row_index}: {error}")
return ""
time.sleep(3)
# get the page content
print(f"Getting content from row {row_index}...")
try:
page_content = WebDriverWait(browser, 10).until(
EC.presence_of_element_located(
(By.CLASS_NAME, "notion-page-content")
)
)
extracted_text = page_content.text
print(f"Content extracted from row {row_index}.")
return extracted_text
except Exception as error:
print(f"Failed to extract content from row {row_index}: {error}")
return ""
And here’s how I count the total rows:
def count_table_rows(browser: webdriver.Chrome, table_selector: str) -> int:
"""
Counts the total number of rows in the Notion table.
"""
print("Counting table rows...")
row_elements = browser.find_elements(By.XPATH, table_selector)
row_count = len(row_elements)
print(f"Found {row_count} rows in table")
return row_count
I think the problem is that rows aren’t being detected in the first place. This works fine for small tables but I need to handle tables with 400+ rows. Any suggestions on how to make Selenium detect all rows in a Notion table?