@@ -1688,6 +1688,38 @@ def read(
16881688 columns : Sequence [str ] | None = None ,
16891689 order_categoricals : bool | None = None ,
16901690 ) -> DataFrame :
1691+ """
1692+ Reads observations from Stata file, converting them into a dataframe
1693+
1694+ Parameters
1695+ ----------
1696+ nrows : int
1697+ Number of lines to read from data file, if None read whole file.
1698+ convert_dates : bool, default True
1699+ Convert date variables to DataFrame time values.
1700+ convert_categoricals : bool, default True
1701+ Read value labels and convert columns to Categorical/Factor variables.
1702+ index_col : str, optional
1703+ Column to set as index.
1704+ convert_missing : bool, default False
1705+ Flag indicating whether to convert missing values to their Stata
1706+ representations. If False, missing values are replaced with nan.
1707+ If True, columns containing missing values are returned with
1708+ object data types and missing values are represented by
1709+ StataMissingValue objects.
1710+ preserve_dtypes : bool, default True
1711+ Preserve Stata datatypes. If False, numeric data are upcast to pandas
1712+ default types for foreign data (float64 or int64).
1713+ columns : list or None
1714+ Columns to retain. Columns will be returned in the given order. None
1715+ returns all columns.
1716+ order_categoricals : bool, default True
1717+ Flag indicating whether converted categorical data are ordered.
1718+
1719+ Returns
1720+ -------
1721+ DataFrame
1722+ """
16911723 self ._ensure_open ()
16921724
16931725 # Handle options
@@ -2149,6 +2181,116 @@ def read_stata(
21492181 compression : CompressionOptions = "infer" ,
21502182 storage_options : StorageOptions | None = None ,
21512183) -> DataFrame | StataReader :
2184+ """
2185+ Read Stata file into DataFrame.
2186+
2187+ Parameters
2188+ ----------
2189+ filepath_or_buffer : str, path object or file-like object
2190+ Any valid string path is acceptable. The string could be a URL. Valid
2191+ URL schemes include http, ftp, s3, and file. For file URLs, a host is
2192+ expected. A local file could be: ``file://localhost/path/to/table.dta``.
2193+
2194+ If you want to pass in a path object, pandas accepts any ``os.PathLike``.
2195+
2196+ By file-like object, we refer to objects with a ``read()`` method,
2197+ such as a file handle (e.g. via builtin ``open`` function)
2198+ or ``StringIO``.
2199+ convert_dates : bool, default True
2200+ Convert date variables to DataFrame time values.
2201+ convert_categoricals : bool, default True
2202+ Read value labels and convert columns to Categorical/Factor variables.
2203+ index_col : str, optional
2204+ Column to set as index.
2205+ convert_missing : bool, default False
2206+ Flag indicating whether to convert missing values to their Stata
2207+ representations. If False, missing values are replaced with nan.
2208+ If True, columns containing missing values are returned with
2209+ object data types and missing values are represented by
2210+ StataMissingValue objects.
2211+ preserve_dtypes : bool, default True
2212+ Preserve Stata datatypes. If False, numeric data are upcast to pandas
2213+ default types for foreign data (float64 or int64).
2214+ columns : list or None
2215+ Columns to retain. Columns will be returned in the given order. None
2216+ returns all columns.
2217+ order_categoricals : bool, default True
2218+ Flag indicating whether converted categorical data are ordered.
2219+ chunksize : int, default None
2220+ Return StataReader object for iterations, returns chunks with
2221+ given number of lines.
2222+ iterator : bool, default False
2223+ Return StataReader object.
2224+ compression : str or dict, default 'infer'
2225+ For on-the-fly decompression of on-disk data. If 'infer' and 'filepath_or_buffer' is
2226+ path-like, then detect compression from the following extensions: '.gz',
2227+ '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2'
2228+ (otherwise no compression).
2229+ If using 'zip' or 'tar', the ZIP file must contain only one data file to be read in.
2230+ Set to ``None`` for no decompression.
2231+ Can also be a dict with key ``'method'`` set
2232+ to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and
2233+ other key-value pairs are forwarded to
2234+ ``zipfile.ZipFile``, ``gzip.GzipFile``,
2235+ ``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or
2236+ ``tarfile.TarFile``, respectively.
2237+ As an example, the following could be passed for Zstandard decompression using a
2238+ custom compression dictionary:
2239+ ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``.
2240+
2241+ .. versionadded:: 1.5.0
2242+ Added support for `.tar` files.
2243+ storage_options : dict, optional
2244+ Extra options that make sense for a particular storage connection, e.g.
2245+ host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
2246+ are forwarded to ``urllib.request.Request`` as header options. For other
2247+ URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are
2248+ forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more
2249+ details, and for more examples on storage options refer `here
2250+ <https://pandas.pydata.org/docs/user_guide/io.html?
2251+ highlight=storage_options#reading-writing-remote-files>`_.
2252+
2253+ Returns
2254+ -------
2255+ DataFrame, pandas.api.typing.StataReader
2256+ If iterator or chunksize, returns StataReader, else DataFrame.
2257+
2258+ See Also
2259+ --------
2260+ io.stata.StataReader : Low-level reader for Stata data files.
2261+ DataFrame.to_stata: Export Stata data files.
2262+
2263+ Notes
2264+ -----
2265+ Categorical variables read through an iterator may not have the same
2266+ categories and dtype. This occurs when a variable stored in a DTA
2267+ file is associated to an incomplete set of value labels that only
2268+ label a strict subset of the values.
2269+
2270+ Examples
2271+ --------
2272+
2273+ Creating a dummy stata for this example
2274+
2275+ >>> df = pd.DataFrame({'animal': ['falcon', 'parrot', 'falcon', 'parrot'],
2276+ ... 'speed': [350, 18, 361, 15]}) # doctest: +SKIP
2277+ >>> df.to_stata('animals.dta') # doctest: +SKIP
2278+
2279+ Read a Stata dta file:
2280+
2281+ >>> df = pd.read_stata('animals.dta') # doctest: +SKIP
2282+
2283+ Read a Stata dta file in 10,000 line chunks:
2284+
2285+ >>> values = np.random.randint(0, 10, size=(20_000, 1), dtype="uint8") # doctest: +SKIP
2286+ >>> df = pd.DataFrame(values, columns=["i"]) # doctest: +SKIP
2287+ >>> df.to_stata('filename.dta') # doctest: +SKIP
2288+
2289+ >>> with pd.read_stata('filename.dta', chunksize=10000) as itr: # doctest: +SKIP
2290+ >>> for chunk in itr:
2291+ ... # Operate on a single chunk, e.g., chunk.mean()
2292+ ... pass # doctest: +SKIP
2293+ """
21522294 reader = StataReader (
21532295 filepath_or_buffer ,
21542296 convert_dates = convert_dates ,
0 commit comments