diff --git a/docs/source/api/dataframe.rst b/docs/source/api/dataframe.rst index a9e9e47c..6fecbce8 100644 --- a/docs/source/api/dataframe.rst +++ b/docs/source/api/dataframe.rst @@ -150,238 +150,3 @@ To materialize the results of your DataFrame operations: # Count rows count = df.count() -HTML Rendering in Jupyter -------------------------- - -When working in Jupyter notebooks or other environments that support rich HTML display, -DataFusion DataFrames automatically render as nicely formatted HTML tables. This functionality -is provided by the ``_repr_html_`` method, which is automatically called by Jupyter. - -Basic HTML Rendering -~~~~~~~~~~~~~~~~~~~~ - -In a Jupyter environment, simply displaying a DataFrame object will trigger HTML rendering: - -.. code-block:: python - - # Will display as HTML table in Jupyter - df - - # Explicit display also uses HTML rendering - display(df) - -HTML Rendering Customization ----------------------------- - -DataFusion provides extensive customization options for HTML table rendering through the -``datafusion.html_formatter`` module. - -Configuring the HTML Formatter -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You can customize how DataFrames are rendered by configuring the formatter: - -.. code-block:: python - - from datafusion.html_formatter import configure_formatter - - configure_formatter( - max_cell_length=30, # Maximum length of cell content before truncation - max_width=800, # Maximum width of table in pixels - max_height=400, # Maximum height of table in pixels - max_memory_bytes=2 * 1024 * 1024,# Maximum memory used for rendering (2MB) - min_rows_display=10, # Minimum rows to display - repr_rows=20, # Number of rows to display in representation - enable_cell_expansion=True, # Allow cells to be expandable on click - custom_css=None, # Custom CSS to apply - show_truncation_message=True, # Show message when data is truncated - style_provider=None, # Custom style provider class - use_shared_styles=True # Share styles across tables to reduce duplication - ) - -Custom Style Providers -~~~~~~~~~~~~~~~~~~~~~~ - -For advanced styling needs, you can create a custom style provider class: - -.. code-block:: python - - from datafusion.html_formatter import configure_formatter - - class CustomStyleProvider: - def get_cell_style(self) -> str: - return "background-color: #f5f5f5; color: #333; padding: 8px; border: 1px solid #ddd;" - - def get_header_style(self) -> str: - return "background-color: #4285f4; color: white; font-weight: bold; padding: 10px;" - - # Apply custom styling - configure_formatter(style_provider=CustomStyleProvider()) - -Custom Type Formatters -~~~~~~~~~~~~~~~~~~~~~~ - -You can register custom formatters for specific data types: - -.. code-block:: python - - from datafusion.html_formatter import get_formatter - - formatter = get_formatter() - - # Format integers with color based on value - def format_int(value): - return f' 100 else "blue"}">{value}' - - formatter.register_formatter(int, format_int) - - # Format date values - def format_date(value): - return f'{value.isoformat()}' - - formatter.register_formatter(datetime.date, format_date) - -Custom Cell Builders -~~~~~~~~~~~~~~~~~~~~ - -For complete control over cell rendering: - -.. code-block:: python - - formatter = get_formatter() - - def custom_cell_builder(value, row, col, table_id): - try: - num_value = float(value) - if num_value > 0: # Positive values get green - return f'{value}' - if num_value < 0: # Negative values get red - return f'{value}' - except (ValueError, TypeError): - pass - - # Default styling for non-numeric or zero values - return f'{value}' - - formatter.set_custom_cell_builder(custom_cell_builder) - -Custom Header Builders -~~~~~~~~~~~~~~~~~~~~~~ - -Similarly, you can customize the rendering of table headers: - -.. code-block:: python - - def custom_header_builder(field): - tooltip = f"Type: {field.type}" - return f'{field.name}' - - formatter.set_custom_header_builder(custom_header_builder) - -Managing Formatter State ------------------------~ - -The HTML formatter maintains global state that can be managed: - -.. code-block:: python - - from datafusion.html_formatter import reset_formatter, reset_styles_loaded_state, get_formatter - - # Reset the formatter to default settings - reset_formatter() - - # Reset only the styles loaded state (useful when styles were loaded but need reloading) - reset_styles_loaded_state() - - # Get the current formatter instance to make changes - formatter = get_formatter() - -Advanced Example: Dashboard-Style Formatting -------------------------------------------~~ - -This example shows how to create a dashboard-like styling for your DataFrames: - -.. code-block:: python - - from datafusion.html_formatter import configure_formatter, get_formatter - - # Define custom CSS - custom_css = """ - .datafusion-table { - font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; - border-collapse: collapse; - width: 100%; - box-shadow: 0 2px 3px rgba(0,0,0,0.1); - } - .datafusion-table th { - position: sticky; - top: 0; - z-index: 10; - } - .datafusion-table tr:hover td { - background-color: #f1f7fa !important; - } - .datafusion-table .numeric-positive { - color: #0a7c00; - } - .datafusion-table .numeric-negative { - color: #d13438; - } - """ - - class DashboardStyleProvider: - def get_cell_style(self) -> str: - return "padding: 8px 12px; border-bottom: 1px solid #e0e0e0;" - - def get_header_style(self) -> str: - return ("background-color: #0078d4; color: white; font-weight: 600; " - "padding: 12px; text-align: left; border-bottom: 2px solid #005a9e;") - - # Apply configuration - configure_formatter( - max_height=500, - enable_cell_expansion=True, - custom_css=custom_css, - style_provider=DashboardStyleProvider(), - max_cell_length=50 - ) - - # Add custom formatters for numbers - formatter = get_formatter() - - def format_number(value): - try: - num = float(value) - cls = "numeric-positive" if num > 0 else "numeric-negative" if num < 0 else "" - return f'{value:,}' if cls else f'{value:,}' - except (ValueError, TypeError): - return str(value) - - formatter.register_formatter(int, format_number) - formatter.register_formatter(float, format_number) - -Best Practices --------------- - -1. **Memory Management**: For large datasets, use ``max_memory_bytes`` to limit memory usage. - -2. **Responsive Design**: Set reasonable ``max_width`` and ``max_height`` values to ensure tables display well on different screens. - -3. **Style Optimization**: Use ``use_shared_styles=True`` to avoid duplicate style definitions when displaying multiple tables. - -4. **Reset When Needed**: Call ``reset_formatter()`` when you want to start fresh with default settings. - -5. **Cell Expansion**: Use ``enable_cell_expansion=True`` when cells might contain longer content that users may want to see in full. - -Additional Resources --------------------- - -* :doc:`../user-guide/dataframe` - Complete guide to using DataFrames -* :doc:`../user-guide/io/index` - I/O Guide for reading data from various sources -* :doc:`../user-guide/data-sources` - Comprehensive data sources guide -* :ref:`io_csv` - CSV file reading -* :ref:`io_parquet` - Parquet file reading -* :ref:`io_json` - JSON file reading -* :ref:`io_avro` - Avro file reading -* :ref:`io_custom_table_provider` - Custom table providers -* `API Reference `_ - Full API reference diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst index 7f58227c..131b97e0 100644 --- a/docs/source/api/index.rst +++ b/docs/source/api/index.rst @@ -23,5 +23,3 @@ This section provides detailed API documentation for the DataFusion Python libra .. toctree:: :maxdepth: 2 - - dataframe diff --git a/docs/source/index.rst b/docs/source/index.rst index ff1e4728..9b3a2f7f 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -72,7 +72,7 @@ Example user-guide/introduction user-guide/basics user-guide/data-sources - user-guide/dataframe + user-guide/dataframe/index user-guide/common-operations/index user-guide/io/index user-guide/configuration @@ -88,10 +88,3 @@ Example contributor-guide/introduction contributor-guide/ffi -.. _toc.api: -.. toctree:: - :hidden: - :maxdepth: 1 - :caption: API - - api/index diff --git a/docs/source/user-guide/dataframe.rst b/docs/source/user-guide/dataframe/index.rst similarity index 99% rename from docs/source/user-guide/dataframe.rst rename to docs/source/user-guide/dataframe/index.rst index 23c65b5f..78569f9f 100644 --- a/docs/source/user-guide/dataframe.rst +++ b/docs/source/user-guide/dataframe/index.rst @@ -215,4 +215,9 @@ You can control how much data is displayed and how much memory is used for rende repr_rows=20 # Show 20 rows in __repr__ output ) -These parameters help balance comprehensive data display against performance considerations. \ No newline at end of file +These parameters help balance comprehensive data display against performance considerations. + +.. toctree:: + :maxdepth: 1 + + rendering diff --git a/docs/source/user-guide/dataframe/rendering.rst b/docs/source/user-guide/dataframe/rendering.rst new file mode 100644 index 00000000..8a97d6f1 --- /dev/null +++ b/docs/source/user-guide/dataframe/rendering.rst @@ -0,0 +1,235 @@ +HTML Rendering in Jupyter +------------------------- + +When working in Jupyter notebooks or other environments that support rich HTML display, +DataFusion DataFrames automatically render as nicely formatted HTML tables. This functionality +is provided by the ``_repr_html_`` method, which is automatically called by Jupyter. + +Basic HTML Rendering +~~~~~~~~~~~~~~~~~~~~ + +In a Jupyter environment, simply displaying a DataFrame object will trigger HTML rendering: + +.. code-block:: python + + # Will display as HTML table in Jupyter + df + + # Explicit display also uses HTML rendering + display(df) + +HTML Rendering Customization +---------------------------- + +DataFusion provides extensive customization options for HTML table rendering through the +``datafusion.html_formatter`` module. + +Configuring the HTML Formatter +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can customize how DataFrames are rendered by configuring the formatter: + +.. code-block:: python + + from datafusion.html_formatter import configure_formatter + + configure_formatter( + max_cell_length=30, # Maximum length of cell content before truncation + max_width=800, # Maximum width of table in pixels + max_height=400, # Maximum height of table in pixels + max_memory_bytes=2 * 1024 * 1024,# Maximum memory used for rendering (2MB) + min_rows_display=10, # Minimum rows to display + repr_rows=20, # Number of rows to display in representation + enable_cell_expansion=True, # Allow cells to be expandable on click + custom_css=None, # Custom CSS to apply + show_truncation_message=True, # Show message when data is truncated + style_provider=None, # Custom style provider class + use_shared_styles=True # Share styles across tables to reduce duplication + ) + +Custom Style Providers +~~~~~~~~~~~~~~~~~~~~~~ + +For advanced styling needs, you can create a custom style provider class: + +.. code-block:: python + + from datafusion.html_formatter import configure_formatter + + class CustomStyleProvider: + def get_cell_style(self) -> str: + return "background-color: #f5f5f5; color: #333; padding: 8px; border: 1px solid #ddd;" + + def get_header_style(self) -> str: + return "background-color: #4285f4; color: white; font-weight: bold; padding: 10px;" + + # Apply custom styling + configure_formatter(style_provider=CustomStyleProvider()) + +Custom Type Formatters +~~~~~~~~~~~~~~~~~~~~~~ + +You can register custom formatters for specific data types: + +.. code-block:: python + + from datafusion.html_formatter import get_formatter + + formatter = get_formatter() + + # Format integers with color based on value + def format_int(value): + return f' 100 else "blue"}">{value}' + + formatter.register_formatter(int, format_int) + + # Format date values + def format_date(value): + return f'{value.isoformat()}' + + formatter.register_formatter(datetime.date, format_date) + +Custom Cell Builders +~~~~~~~~~~~~~~~~~~~~ + +For complete control over cell rendering: + +.. code-block:: python + + formatter = get_formatter() + + def custom_cell_builder(value, row, col, table_id): + try: + num_value = float(value) + if num_value > 0: # Positive values get green + return f'{value}' + if num_value < 0: # Negative values get red + return f'{value}' + except (ValueError, TypeError): + pass + + # Default styling for non-numeric or zero values + return f'{value}' + + formatter.set_custom_cell_builder(custom_cell_builder) + +Custom Header Builders +~~~~~~~~~~~~~~~~~~~~~~ + +Similarly, you can customize the rendering of table headers: + +.. code-block:: python + + def custom_header_builder(field): + tooltip = f"Type: {field.type}" + return f'{field.name}' + + formatter.set_custom_header_builder(custom_header_builder) + +Managing Formatter State +-----------------------~ + +The HTML formatter maintains global state that can be managed: + +.. code-block:: python + + from datafusion.html_formatter import reset_formatter, reset_styles_loaded_state, get_formatter + + # Reset the formatter to default settings + reset_formatter() + + # Reset only the styles loaded state (useful when styles were loaded but need reloading) + reset_styles_loaded_state() + + # Get the current formatter instance to make changes + formatter = get_formatter() + +Advanced Example: Dashboard-Style Formatting +------------------------------------------~~ + +This example shows how to create a dashboard-like styling for your DataFrames: + +.. code-block:: python + + from datafusion.html_formatter import configure_formatter, get_formatter + + # Define custom CSS + custom_css = """ + .datafusion-table { + font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; + border-collapse: collapse; + width: 100%; + box-shadow: 0 2px 3px rgba(0,0,0,0.1); + } + .datafusion-table th { + position: sticky; + top: 0; + z-index: 10; + } + .datafusion-table tr:hover td { + background-color: #f1f7fa !important; + } + .datafusion-table .numeric-positive { + color: #0a7c00; + } + .datafusion-table .numeric-negative { + color: #d13438; + } + """ + + class DashboardStyleProvider: + def get_cell_style(self) -> str: + return "padding: 8px 12px; border-bottom: 1px solid #e0e0e0;" + + def get_header_style(self) -> str: + return ("background-color: #0078d4; color: white; font-weight: 600; " + "padding: 12px; text-align: left; border-bottom: 2px solid #005a9e;") + + # Apply configuration + configure_formatter( + max_height=500, + enable_cell_expansion=True, + custom_css=custom_css, + style_provider=DashboardStyleProvider(), + max_cell_length=50 + ) + + # Add custom formatters for numbers + formatter = get_formatter() + + def format_number(value): + try: + num = float(value) + cls = "numeric-positive" if num > 0 else "numeric-negative" if num < 0 else "" + return f'{value:,}' if cls else f'{value:,}' + except (ValueError, TypeError): + return str(value) + + formatter.register_formatter(int, format_number) + formatter.register_formatter(float, format_number) + +Best Practices +-------------- + +1. **Memory Management**: For large datasets, use ``max_memory_bytes`` to limit memory usage. + +2. **Responsive Design**: Set reasonable ``max_width`` and ``max_height`` values to ensure tables display well on different screens. + +3. **Style Optimization**: Use ``use_shared_styles=True`` to avoid duplicate style definitions when displaying multiple tables. + +4. **Reset When Needed**: Call ``reset_formatter()`` when you want to start fresh with default settings. + +5. **Cell Expansion**: Use ``enable_cell_expansion=True`` when cells might contain longer content that users may want to see in full. + +Additional Resources +-------------------- + +* :doc:`../user-guide/dataframe` - Complete guide to using DataFrames +* :doc:`../user-guide/io/index` - I/O Guide for reading data from various sources +* :doc:`../user-guide/data-sources` - Comprehensive data sources guide +* :ref:`io_csv` - CSV file reading +* :ref:`io_parquet` - Parquet file reading +* :ref:`io_json` - JSON file reading +* :ref:`io_avro` - Avro file reading +* :ref:`io_custom_table_provider` - Custom table providers +* `API Reference `_ - Full API reference