diff --git a/pandas/io/html.py b/pandas/io/html.py
index c4ffe332e3020..3193f52d239f1 100644
--- a/pandas/io/html.py
+++ b/pandas/io/html.py
@@ -8,7 +8,9 @@
import numbers
import os
import re
+from typing import Dict, List, Optional, Pattern, Sequence, Union
+from pandas._typing import FilePathOrBuffer
from pandas.compat._optional import import_optional_dependency
from pandas.errors import AbstractMethodError, EmptyDataError
from pandas.util._decorators import deprecate_nonkeyword_arguments
@@ -16,6 +18,7 @@
from pandas.core.dtypes.common import is_list_like
from pandas.core.construction import create_series_with_explicit_dtype
+from pandas.core.frame import DataFrame
from pandas.io.common import is_url, urlopen, validate_header_arg
from pandas.io.formats.printing import pprint_thing
@@ -924,22 +927,22 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):
@deprecate_nonkeyword_arguments(version="2.0")
def read_html(
- io,
- match=".+",
- flavor=None,
- header=None,
- index_col=None,
- skiprows=None,
- attrs=None,
- parse_dates=False,
- thousands=",",
- encoding=None,
- decimal=".",
- converters=None,
+ io: FilePathOrBuffer,
+ match: Union[str, Pattern] = ".+",
+ flavor: Optional[str] = None,
+ header: Optional[Union[int, Sequence[int]]] = None,
+ index_col: Optional[Union[int, Sequence[int]]] = None,
+ skiprows: Optional[Union[int, Sequence[int], slice]] = None,
+ attrs: Optional[Dict[str, str]] = None,
+ parse_dates: bool = False,
+ thousands: Optional[str] = ",",
+ encoding: Optional[str] = None,
+ decimal: str = ".",
+ converters: Optional[Dict] = None,
na_values=None,
- keep_default_na=True,
- displayed_only=True,
-):
+ keep_default_na: bool = True,
+ displayed_only: bool = True,
+) -> List[DataFrame]:
r"""
Read HTML tables into a ``list`` of ``DataFrame`` objects.
@@ -958,26 +961,26 @@ def read_html(
This value is converted to a regular expression so that there is
consistent behavior between Beautiful Soup and lxml.
- flavor : str or None
+ flavor : str, optional
The parsing engine to use. 'bs4' and 'html5lib' are synonymous with
each other, they are both there for backwards compatibility. The
default of ``None`` tries to use ``lxml`` to parse and if that fails it
falls back on ``bs4`` + ``html5lib``.
- header : int or list-like or None, optional
+ header : int or list-like, optional
The row (or list of rows for a :class:`~pandas.MultiIndex`) to use to
make the columns headers.
- index_col : int or list-like or None, optional
+ index_col : int or list-like, optional
The column (or list of columns) to use to create the index.
- skiprows : int or list-like or slice or None, optional
+ skiprows : int, list-like or slice, optional
Number of rows to skip after parsing the column integer. 0-based. If a
sequence of integers or a slice is given, will skip the rows indexed by
that sequence. Note that a single element sequence means 'skip the nth
row' whereas an integer means 'skip n rows'.
- attrs : dict or None, optional
+ attrs : dict, optional
This is a dictionary of attributes that you can pass to use to identify
the table in the HTML. These are not checked for validity before being
passed to lxml or Beautiful Soup. However, these attributes must be
@@ -1005,7 +1008,7 @@ def read_html(
thousands : str, optional
Separator to use to parse thousands. Defaults to ``','``.
- encoding : str or None, optional
+ encoding : str, optional
The encoding used to decode the web page. Defaults to ``None``.``None``
preserves the previous encoding behavior, which depends on the
underlying parser library (e.g., the parser library will try to use