159 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			159 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
"""
 | 
						|
Read SAS sas7bdat or xport files.
 | 
						|
"""
 | 
						|
from __future__ import annotations
 | 
						|
 | 
						|
from abc import (
 | 
						|
    ABCMeta,
 | 
						|
    abstractmethod,
 | 
						|
)
 | 
						|
from typing import (
 | 
						|
    TYPE_CHECKING,
 | 
						|
    Hashable,
 | 
						|
    overload,
 | 
						|
)
 | 
						|
 | 
						|
from pandas._typing import (
 | 
						|
    FilePath,
 | 
						|
    ReadBuffer,
 | 
						|
)
 | 
						|
 | 
						|
from pandas.io.common import stringify_path
 | 
						|
 | 
						|
if TYPE_CHECKING:
 | 
						|
    from pandas import DataFrame
 | 
						|
 | 
						|
 | 
						|
# TODO(PY38): replace with Protocol in Python 3.8
 | 
						|
class ReaderBase(metaclass=ABCMeta):
 | 
						|
    """
 | 
						|
    Protocol for XportReader and SAS7BDATReader classes.
 | 
						|
    """
 | 
						|
 | 
						|
    @abstractmethod
 | 
						|
    def read(self, nrows=None):
 | 
						|
        pass
 | 
						|
 | 
						|
    @abstractmethod
 | 
						|
    def close(self):
 | 
						|
        pass
 | 
						|
 | 
						|
    def __enter__(self):
 | 
						|
        return self
 | 
						|
 | 
						|
    def __exit__(self, exc_type, exc_value, traceback):
 | 
						|
        self.close()
 | 
						|
 | 
						|
 | 
						|
@overload
 | 
						|
def read_sas(
 | 
						|
    filepath_or_buffer: FilePath | ReadBuffer[bytes],
 | 
						|
    format: str | None = ...,
 | 
						|
    index: Hashable | None = ...,
 | 
						|
    encoding: str | None = ...,
 | 
						|
    chunksize: int = ...,
 | 
						|
    iterator: bool = ...,
 | 
						|
) -> ReaderBase:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
@overload
 | 
						|
def read_sas(
 | 
						|
    filepath_or_buffer: FilePath | ReadBuffer[bytes],
 | 
						|
    format: str | None = ...,
 | 
						|
    index: Hashable | None = ...,
 | 
						|
    encoding: str | None = ...,
 | 
						|
    chunksize: None = ...,
 | 
						|
    iterator: bool = ...,
 | 
						|
) -> DataFrame | ReaderBase:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
def read_sas(
 | 
						|
    filepath_or_buffer: FilePath | ReadBuffer[bytes],
 | 
						|
    format: str | None = None,
 | 
						|
    index: Hashable | None = None,
 | 
						|
    encoding: str | None = None,
 | 
						|
    chunksize: int | None = None,
 | 
						|
    iterator: bool = False,
 | 
						|
) -> DataFrame | ReaderBase:
 | 
						|
    """
 | 
						|
    Read SAS files stored as either XPORT or SAS7BDAT format files.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    filepath_or_buffer : str, path object, or file-like object
 | 
						|
        String, path object (implementing ``os.PathLike[str]``), or file-like
 | 
						|
        object implementing a binary ``read()`` function. The string could be a URL.
 | 
						|
        Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
 | 
						|
        expected. A local file could be:
 | 
						|
        ``file://localhost/path/to/table.sas``.
 | 
						|
    format : str {'xport', 'sas7bdat'} or None
 | 
						|
        If None, file format is inferred from file extension. If 'xport' or
 | 
						|
        'sas7bdat', uses the corresponding format.
 | 
						|
    index : identifier of index column, defaults to None
 | 
						|
        Identifier of column that should be used as index of the DataFrame.
 | 
						|
    encoding : str, default is None
 | 
						|
        Encoding for text data.  If None, text data are stored as raw bytes.
 | 
						|
    chunksize : int
 | 
						|
        Read file `chunksize` lines at a time, returns iterator.
 | 
						|
 | 
						|
        .. versionchanged:: 1.2
 | 
						|
 | 
						|
            ``TextFileReader`` is a context manager.
 | 
						|
    iterator : bool, defaults to False
 | 
						|
        If True, returns an iterator for reading the file incrementally.
 | 
						|
 | 
						|
        .. versionchanged:: 1.2
 | 
						|
 | 
						|
            ``TextFileReader`` is a context manager.
 | 
						|
 | 
						|
    Returns
 | 
						|
    -------
 | 
						|
    DataFrame if iterator=False and chunksize=None, else SAS7BDATReader
 | 
						|
    or XportReader
 | 
						|
    """
 | 
						|
    if format is None:
 | 
						|
        buffer_error_msg = (
 | 
						|
            "If this is a buffer object rather "
 | 
						|
            "than a string name, you must specify a format string"
 | 
						|
        )
 | 
						|
        filepath_or_buffer = stringify_path(filepath_or_buffer)
 | 
						|
        if not isinstance(filepath_or_buffer, str):
 | 
						|
            raise ValueError(buffer_error_msg)
 | 
						|
        fname = filepath_or_buffer.lower()
 | 
						|
        if fname.endswith(".xpt"):
 | 
						|
            format = "xport"
 | 
						|
        elif fname.endswith(".sas7bdat"):
 | 
						|
            format = "sas7bdat"
 | 
						|
        else:
 | 
						|
            raise ValueError("unable to infer format of SAS file")
 | 
						|
 | 
						|
    reader: ReaderBase
 | 
						|
    if format.lower() == "xport":
 | 
						|
        from pandas.io.sas.sas_xport import XportReader
 | 
						|
 | 
						|
        reader = XportReader(
 | 
						|
            filepath_or_buffer,
 | 
						|
            index=index,
 | 
						|
            encoding=encoding,
 | 
						|
            chunksize=chunksize,
 | 
						|
        )
 | 
						|
    elif format.lower() == "sas7bdat":
 | 
						|
        from pandas.io.sas.sas7bdat import SAS7BDATReader
 | 
						|
 | 
						|
        reader = SAS7BDATReader(
 | 
						|
            filepath_or_buffer,
 | 
						|
            index=index,
 | 
						|
            encoding=encoding,
 | 
						|
            chunksize=chunksize,
 | 
						|
        )
 | 
						|
    else:
 | 
						|
        raise ValueError("unknown SAS format")
 | 
						|
 | 
						|
    if iterator or chunksize:
 | 
						|
        return reader
 | 
						|
 | 
						|
    with reader:
 | 
						|
        return reader.read()
 |