Inspecting SEG-Y

`TraceHeaders`

A convenience class for accessing and iterating over a SEG-Y files trace headers. This class should be used with a context manager.

Examples:

>>> with TraceHeaders(segy_file, bytes_filter=bytes_filter, **segyio_kwargs) as headers:
        ntraces = headers.ntraces
        df = headers.to_dataframe(selection=slice(0, 100)))

Source code in segysak/segy/_segy_headers.py

Python
class TraceHeaders:
    """A convenience class for accessing and iterating over a SEG-Y files trace
    headers. This class should be used with a context manager.

    Examples:

        >>> with TraceHeaders(segy_file, bytes_filter=bytes_filter, **segyio_kwargs) as headers:
                ntraces = headers.ntraces
                df = headers.to_dataframe(selection=slice(0, 100)))

    """

    def __init__(
        self,
        segy_file: Union[str, os.PathLike],
        bytes_filter: Union[List[int], None] = None,
        tracefield_filter: Union[List[str], None] = None,
        **segyio_kwargs: Any,
    ):

        check_tracefield(bytes_filter)
        check_tracefield_names(tracefield_filter)

        self.filter = self._combine_filters(bytes_filter, tracefield_filter)

        self.bytes_filter = bytes_filter

        self.segy_file = segy_file
        _segyio_kwargs = segyio_kwargs.copy()
        _segyio_kwargs.update({"ignore_geometry": True})
        self.fh = segyio.open(self.segy_file, "r", **_segyio_kwargs)
        self.ntraces = self.fh.tracecount

    def _combine_filters(
        self,
        bytes_filter: Union[List[int], None],
        tracefield_filter: Union[List[str], None],
    ) -> List[segyio.tracefield.TraceField]:

        filter_list = []
        if bytes_filter is not None:
            filter_list += [
                segyio.tracefield.TraceField(byte_loc) for byte_loc in bytes_filter
            ]

        if tracefield_filter is not None:
            filter_list += [
                segyio.tracefield.TraceField(segyio.tracefield.keys[key])
                for key in tracefield_filter
            ]

        if filter_list:
            filter_list = list(set(filter_list))
        else:
            filter_list = [
                segyio.tracefield.TraceField(byte)
                for byte in segyio.tracefield.keys.values()
            ]

        return filter_list

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.fh.close()

    def __iter__(self) -> Generator[Dict[str, Any], None, None]:
        return self[:]

    def __getitem__(
        self, i: Union[int, slice]
    ) -> Generator[Dict[str, Any], None, None]:
        silent = Progress._segysak_tqdm_kwargs["disable"]
        if isinstance(i, int):
            silent = True
            n = 1
        else:
            silent = False
            n = len(range(*i.indices(self.ntraces)))

        with Progress(unit=" traces", total=n, desc="Reading header") as pbar:
            for header in self.fh.header[i]:
                pbar.update(1)
                yield {key: header[key] for key in self.filter}

    def to_dataframe(self, selection: Union[int, slice, None] = None) -> pd.DataFrame:
        """Return the Trace Headers as a DataFrame

        Args:
            selection: A subset of trace headers will be returned based on trace numbering.
        """
        if isinstance(selection, int):
            index = pd.Index(range(i, i + 1))
        elif isinstance(selection, slice):
            index = pd.Index(range(*selection.indices(self.ntraces)))
        else:
            index = pd.Index(range(self.ntraces))
            selection = slice(None, None, None)

        columns = tuple(str(f) for f in self.filter)

        head_df = pd.DataFrame(index=index, columns=columns)
        # This is slightly faster than building from dicts
        head_df.iloc[:, :] = np.vstack([list(h.values()) for h in self[selection]])

        # fix bad values
        # head_df = head_df.replace(to_replace=-2147483648, value=np.nan)
        # convert numeric
        for col in head_df:
            head_df[col] = pd.to_numeric(head_df[col], downcast="integer")

        return head_df

`to_dataframe(selection=None)`

Return the Trace Headers as a DataFrame

Parameters:

Name	Type	Description	Default
`selection`	`Union[int, slice, None]`	A subset of trace headers will be returned based on trace numbering.	`None`

Source code in segysak/segy/_segy_headers.py

Python
def to_dataframe(self, selection: Union[int, slice, None] = None) -> pd.DataFrame:
    """Return the Trace Headers as a DataFrame

    Args:
        selection: A subset of trace headers will be returned based on trace numbering.
    """
    if isinstance(selection, int):
        index = pd.Index(range(i, i + 1))
    elif isinstance(selection, slice):
        index = pd.Index(range(*selection.indices(self.ntraces)))
    else:
        index = pd.Index(range(self.ntraces))
        selection = slice(None, None, None)

    columns = tuple(str(f) for f in self.filter)

    head_df = pd.DataFrame(index=index, columns=columns)
    # This is slightly faster than building from dicts
    head_df.iloc[:, :] = np.vstack([list(h.values()) for h in self[selection]])

    # fix bad values
    # head_df = head_df.replace(to_replace=-2147483648, value=np.nan)
    # convert numeric
    for col in head_df:
        head_df[col] = pd.to_numeric(head_df[col], downcast="integer")

    return head_df

`segy_header_scrape(segy_file, partial_scan=None, bytes_filter=None, chunk=100000, **segyio_kwargs)`

Scape all data from segy trace headers

Parameters:

Name	Type	Description	Default
`segy_file`	`Union[str, PathLike]`	SEG-Y File path	required
`partial_scan`	`Union[int, None]`	Setting partial scan to a positive int will scan only that many traces. Defaults to None.	`None`
`bytes_filter`	`Union[List[int], None]`	List of byte locations to load exclusively.	`None`
`chunk`	`int`	Number of traces to read in one go.	`100000`
`segyio_kwargs`	`Any`	Arguments passed to segyio.open	`{}`

Returns:

Type	Description
`DataFrame`	pandas.DataFrame: Raw header information in table for scanned traces.

Source code in segysak/segy/_segy_headers.py

Python
def segy_header_scrape(
    segy_file: Union[str, os.PathLike],
    partial_scan: Union[int, None] = None,
    bytes_filter: Union[List[int], None] = None,
    chunk: int = 100_000,
    **segyio_kwargs: Any,
) -> pd.DataFrame:
    """Scape all data from segy trace headers

    Args:
        segy_file: SEG-Y File path
        partial_scan: Setting partial scan to a positive int will scan only
            that many traces. Defaults to None.
        bytes_filter: List of byte locations to load exclusively.
        chunk: Number of traces to read in one go.
        segyio_kwargs: Arguments passed to segyio.open

    Returns:
        pandas.DataFrame: Raw header information in table for scanned traces.
    """
    with TraceHeaders(segy_file, bytes_filter=bytes_filter, **segyio_kwargs) as headers:
        if partial_scan is not None:
            ntraces = partial_scan
        else:
            ntraces = headers.ntraces

        chunks = ntraces // chunk + min(ntraces % chunk, 1)
        _dfs = []
        with Progress(
            unit=" trace-chunks", total=chunks, desc="Processing Chunks"
        ) as pbar:
            for chk in range(0, chunks):
                chk_slc = slice(chk * chunk, min((chk + 1) * chunk, ntraces), None)
                _dfs.append(headers.to_dataframe(selection=chk_slc))
                pbar.update(1)

    head_df = pd.concat(_dfs)
    return head_df

`segy_bin_scrape(segy_file, **segyio_kwargs)`

Scrape binary header

Parameters:

Name	Type	Description	Default
`segy_file`	`Union[str, PathLike]`	SEG-Y file path	required
`segyio_kwargs`	`Any`	Arguments passed to segyio.open	`{}`

Returns:

Type	Description
`Dict`	Binary header key value pairs

Source code in segysak/segy/_segy_headers.py

Python
def segy_bin_scrape(segy_file: Union[str, os.PathLike], **segyio_kwargs: Any) -> Dict:
    """Scrape binary header

    Args:
        segy_file: SEG-Y file path
        segyio_kwargs: Arguments passed to segyio.open

    Returns:
        Binary header key value pairs
    """
    bk = _active_binfield_segyio()
    segyio_kwargs["ignore_geometry"] = True
    with segyio.open(segy_file, "r", **segyio_kwargs) as segyf:
        return {key: segyf.bin[item] for key, item in bk.items()}

`segy_header_scan(segy_file, max_traces_scan=1000, **segyio_kwargs)`

Perform a scan of the segy file headers and return ranges.

To get the complete raw header values see segy_header_scrape

Parameters:

Name	Type	Description	Default
`segy_file`	`Union[str, PathLike]`	SEG-Y file path	required
`max_traces_scan`	`int`	Number of traces to scan. For scan all traces set to <= 0. Defaults to 1000.	`1000`
`segyio_kwargs`	`Any`	Arguments passed to segyio.open	`{}`

Returns:

Type	Description
`DataFrame`	Uses pandas describe to return statistics of your headers.

Source code in segysak/segy/_segy_headers.py

Python
def segy_header_scan(
    segy_file: Union[str, os.PathLike],
    max_traces_scan: int = 1000,
    **segyio_kwargs: Any,
) -> pd.DataFrame:
    """Perform a scan of the segy file headers and return ranges.

    To get the complete raw header values see `segy_header_scrape`

    Args:
        segy_file: SEG-Y file path
        max_traces_scan: Number of traces to scan. For scan all traces set to <= 0. Defaults to 1000.
        segyio_kwargs: Arguments passed to segyio.open

    Returns:
        Uses pandas describe to return statistics of your headers.
    """
    if max_traces_scan <= 0:
        max_traces_scan = None
    else:
        if not isinstance(max_traces_scan, int):
            raise ValueError("max_traces_scan must be int")

    head_df = segy_header_scrape(segy_file, max_traces_scan, **segyio_kwargs)

    header_keys = head_df.describe().T
    pre_cols = list(header_keys.columns)
    header_keys["byte_loc"] = [segyio.tracefield.keys[key] for key in header_keys.index]
    header_keys = header_keys[["byte_loc"] + pre_cols]
    header_keys.nscan = head_df.shape[0]
    return header_keys

`get_segy_texthead(segy_file, ext_headers=False, no_richstr=False, **segyio_kwargs)`

Return the ebcidc header as a Python string. New lines are separated by the \n char.

Parameters:

Name	Type	Description	Default
`segy_file`	`Union[str, PathLike]`	Segy File Path	required
`ext_headers`	`bool`	Return EBCIDC and extended headers in list. Defaults to False	`False`
`no_richstr`	`bool`	Defaults to False. If true the returned string will not be updated for pretty HTML printing.	`False`
`segyio_kwargs`	`Dict[str, Any]`	Key word arguments to pass to segyio.open	`{}`

Returns:

Name	Type	Description
`text`	`str`	Returns the EBCIDC text as a formatted paragraph.

Source code in segysak/segy/_segy_text.py

Python
def get_segy_texthead(
    segy_file: Union[str, os.PathLike],
    ext_headers: bool = False,
    no_richstr: bool = False,
    **segyio_kwargs: Dict[str, Any],
) -> str:
    """Return the ebcidc header as a Python string. New lines are separated by the `\\n` char.

    Args:
        segy_file: Segy File Path
        ext_headers: Return EBCIDC and extended headers in list. Defaults to False
        no_richstr: Defaults to False. If true the returned string
            will not be updated for pretty HTML printing.
        segyio_kwargs: Key word arguments to pass to segyio.open

    Returns:
        text: Returns the EBCIDC text as a formatted paragraph.
    """

    with open(segy_file, mode="rb") as f:
        f.seek(0, 0)  # Locate our position to first byte of file
        data = f.read(3200)  # Read the first 3200 byte from our position

    if _isascii(data) and ext_headers == False:
        encoding = "ascii"
    elif ext_headers == False:
        encoding = "cp500"  # text is ebcidc
    else:
        encoding = "ebcidc"

    if encoding in ["ascii", "cp500"]:
        lines = []
        # doing it this way ensure we split the bytes appropriately across the 40 lines.
        for i in range(0, 3200, 80):
            lines.append(data[i : i + 80].decode("cp500"))
            text = "\n".join(lines)
    else:
        segyio_kwargs["ignore_geometry"] = True
        try:  # pray that the encoding is ebcidc
            with segyio.open(segy_file, "r", **segyio_kwargs) as segyf:
                text = segyf.text[0].decode("ascii", "replace")
                text = _text_fixes(text)
                text = segyio.tools.wrap(text)
                if segyf.ext_headers and ext_headers:
                    text2 = segyf.text[1].decode("ascii", "replace")
                    text = [text, text2]
        except UnicodeDecodeError as err:
            print(err)
            print("The segy text header could not be decoded.")

    text = _text_fixes(text)

    if no_richstr:
        return text
    else:
        return _upgrade_txt_richstr(text)

`header_as_dimensions(head_df, dims)`

Convert dim_kwargs to a dictionary of dimensions. Also useful for checking geometry is correct and unique for each trace in a segy file header.

Parameters:

Name	Type	Description	Default
`head_df`	`DataFrame`	The header DataFrame from `segy_header_scrape`.	required
`dims`	`tuple`	Dimension names as per head_df column names.	required

Returns:

Name	Type	Description
`dims`	`Dict[str, array]`	Dimension name and label pairs.

Source code in segysak/segy/_segy_headers.py

Python
def header_as_dimensions(head_df: pd.DataFrame, dims: tuple) -> Dict[str, np.array]:
    """Convert dim_kwargs to a dictionary of dimensions. Also useful for checking
    geometry is correct and unique for each trace in a segy file header.

    Args:
        head_df: The header DataFrame from `segy_header_scrape`.
        dims: Dimension names as per head_df column names.

    Returns:
        dims: Dimension name and label pairs.
    """
    unique_dims = dict()
    for dim in dims:
        # get unique values of dimension and sort them ascending
        as_unique = head_df[dim].unique()
        unique_dims[dim] = np.sort(as_unique)

    if head_df[list(dims)].shape != head_df[list(dims)].drop_duplicates().shape:
        raise ValueError(
            "The selected dimensions results in multiple traces per "
            "dimension location, add additional dimensions or use "
            "trace numbering byte location to load as 2D."
        )

    return unique_dims