import dataclasses
import itertools
import math
import pathlib
import typing
import string
from dataclasses import dataclass
import numpy as np
from loguru import logger
from openpyxl import load_workbook
from openpyxl.cell import Cell
from openpyxl.formatting.rule import FormulaRule
from openpyxl.styles import Font, PatternFill
from openpyxl.utils.cell import cols_from_range, coordinate_to_tuple, get_column_letter
from openpyxl.workbook import Workbook
from openpyxl.worksheet.errors import IgnoredError
[docs]
@dataclass
class SpreadsheetCell:
"""
A class to represent a cell in a spreadsheet.
Attributes
----------
column : int
The column number of the cell.
row : int
The row number of the cell.
value : object
The value contained in the cell.
Methods
-------
spreadsheet_cell() -> str
Returns the cell's address in A1 notation.
replace(**kwargs) -> 'SpreadsheetCell'
Returns a new SpreadsheetCell with updated attributes.
first_row(cell_range) -> tuple
Returns the first row of cells in the given range.
first_column(cell_range) -> tuple
Returns the first column of cells in the given range.
submatrix(cell_range) -> tuple
Returns the submatrix excluding the first row and column.
"""
column: int
row: int
value: object
[docs]
def spreadsheet_cell(self) -> str:
"""
Returns the cell's address in A1 notation.
Returns
-------
str
The cell's address in A1 notation.
"""
return f'{get_column_letter(self.column)}{self.row}'
[docs]
def replace(self, **kwargs) -> 'SpreadsheetCell':
"""
Returns a new SpreadsheetCell with updated attributes.
Parameters
----------
**kwargs : dict
The attributes to update.
Returns
-------
SpreadsheetCell
A new SpreadsheetCell with updated attributes.
"""
return dataclasses.replace(self, **kwargs)
[docs]
@classmethod
def first_row(cls, cell_range):
"""
Returns the first row of cells in the given range.
Parameters
----------
cell_range : str
The range of cells in A1 notation.
Returns
-------
tuple
A tuple of SpreadsheetCell objects representing the first row.
"""
table = list(cols_from_range(cell_range))
first_row = [coordinate_to_tuple(rows[0]) for rows in table]
return tuple(SpreadsheetCell(column=cell[1], row=cell[0], value=None) for cell in first_row)
[docs]
@classmethod
def first_column(cls, cell_range):
"""
Returns the first column of cells in the given range.
Parameters
----------
cell_range : str
The range of cells in A1 notation.
Returns
-------
tuple
A tuple of SpreadsheetCell objects representing the first column.
"""
table = list(cols_from_range(cell_range))
first_column = [coordinate_to_tuple(cell) for cell in table[0]]
return tuple(SpreadsheetCell(column=cell[1], row=cell[0], value=None) for cell in first_column)
[docs]
@classmethod
def submatrix(cls, cell_range):
"""
Returns the submatrix excluding the first row and column.
Parameters
----------
cell_range : str
The range of cells in A1 notation.
Returns
-------
tuple
A tuple of SpreadsheetCell objects representing the submatrix.
Examples
--------
the_range = "A1:C3"
submatrix = SpreadsheetCell.submatrix(the_range)
for cell in submatrix:
print(cell.spreadsheet_cell(), cell.column, cell.row)
B2 2 2
C2 3 2
B3 2 3
C3 3 3
"""
table = list(cols_from_range(cell_range))
first_column = [coordinate_to_tuple(cell) for cell in table[0]]
first_row = [coordinate_to_tuple(cols[0]) for cols in table]
box = []
for row in table:
for column in row:
row_idx, column_idx = coordinate_to_tuple(column)
if (row_idx, column_idx) not in first_column and (row_idx, column_idx) not in first_row:
box.append(SpreadsheetCell(column=column_idx, row=row_idx, value=None))
return tuple(sorted(box, key=lambda k: (k.row, k.column)))
[docs]
def iter_cells(first_column: str = 'E', left_padding: str = '') -> typing.Generator[str, None, None]:
""" Returns spreadsheet column names from A up to ZZ
Parameters:
- first_column Letter of the first column to return. Default (E)
- left_padding Padding added in front of single letter columns. Default empty
Returns:
- Generator supplying a column name
"""
if not first_column:
first_index = 0
elif first_column.upper() not in string.ascii_uppercase:
raise ValueError(f'Expected first_column {first_column} in {string.ascii_uppercase}')
elif len(first_column) != 1:
raise ValueError(f'Expected first_column of length 1 was: {len(first_column)}')
else:
first_index = string.ascii_uppercase.index(first_column.upper())
for cell in string.ascii_uppercase[first_index:]:
yield f'{left_padding}{cell}'
for a, b in itertools.product(string.ascii_uppercase, repeat=2):
yield a+b
[docs]
def find_max_column_width(col: typing.Tuple[Cell]):
max_length = 5
for cell in col:
try:
if cell.value is not None:
cell_length = len(str(cell.value)) + 1
if cell.data_type == 'n':
thousands = math.floor(math.log(1_000_000_0000, 1000))
cell_length = max(len(str(cell.value).split('.')[0]) + thousands, 2)
if cell_length > max_length:
max_length = cell_length
except (AttributeError, KeyError, IgnoredError, ValueError) as ex:
logger.debug(f'Got error f{cell.column_letter}')
logger.error(ex)
pass
return max_length
[docs]
def add_top_row_filter(workbook_file: pathlib.Path|str|None=None, workbook: Workbook| None=None, sheet_names: list[str] | None=None):
if not workbook_file and not workbook_file:
raise ValueError('add_top_row_filter require either workbook_file or workbook')
wb = workbook if workbook else load_workbook(workbook_file)
sheet_names = wb.sheetnames if not sheet_names else sheet_names
for worksheet in sheet_names:
ws = wb[worksheet]
top_row = f'A1:{get_column_letter(ws.max_column)}{1}'
ws.auto_filter.ref = top_row
if not workbook and workbook_file:
wb.save(workbook_file)
[docs]
def make_pretty(workbook_name: pathlib.Path|str):
wb = load_workbook(workbook_name)
header_font = Font(name='Source Sans Pro', size=11, bold=True, color="ffffff")
body_font = Font(name='Source Sans Pro', size=11, bold=False, color="000000")
for s in wb.sheetnames:
ws = wb[s]
# Freeze the top row
ws.freeze_panes = ws['A2']
# Define the fill color
header_fill = PatternFill(start_color='c8102e', end_color='c8102e', fill_type='solid')
odd_fill = PatternFill(start_color='ffd8de', end_color='ffd8de', fill_type='solid')
even_fill = PatternFill(start_color='ffebee', end_color='ffebee', fill_type='solid')
# Apply the fill color to the header row
for cell in ws[1]:
cell.fill = header_fill
cell.font = header_font
for row_number, row in enumerate(ws.rows):
if row_number == 0:
continue
for column_number, cell in enumerate(row):
cell.font = body_font
even_rule = FormulaRule(formula=['MOD(ROW(),2)=0'], fill=even_fill)
odd_rule = FormulaRule(formula=['MOD(ROW(),2)=1'], fill=odd_fill)
worksheet_range = f'A2:{get_column_letter(ws.max_column)}{ws.max_row}'
# logger.error(worksheet_range)
ws.conditional_formatting.add(worksheet_range, odd_rule)
ws.conditional_formatting.add(worksheet_range, even_rule)
for col in ws.iter_cols(min_col=0):
adjusted_width = find_max_column_width(col)
ws.column_dimensions[col[0].column_letter].width = adjusted_width + 1.5
# Skipping first row, assuming it is a header for now.
first_column_value = col[0].value
values = [int(r.value) for r in col[1:] if r.value and r.data_type == 'n']
if values:
max_min = max(values), min(values)
number_format = ''
if max_min[0] > 1000:
number_format = r'_-* #,##0_-;\-* #,##0_-;_-* "-"??_-;_-@_-'
elif max_min[0] <=1.0 and max_min[1] >= 0:
# number_format = '0%'
number_format = '0.000'
elif max_min[0] <=1.0 and max_min[1] >=-1.0:
number_format = '0.000'
if number_format:
for row_number, cell in enumerate(col):
if row_number < 1:
if cell.value == 'year':
break
continue
if True or cell.value:
if True or cell.data_type == 'n':
cell.number_format = number_format
else:
cell.number_format = "@"
wb.save(workbook_name)