Source code for src.main.data_models.stock_price_data

from enum import Enum

from typing import Optional, Literal
from pydantic import BaseModel
from openbb import obb

import pandas as pd


[docs] class ProviderEnum(Enum): """ Enumeration of data providers. """ FMP: str = 'fmp' INTRINIO: str = 'intrinio' POLYGON: str = 'polygon' TIINGO: str = 'tiingo' YFINANCE: str = 'yfinance'
# Define a Pydantic model for the stock price data
[docs] class StockPriceData(BaseModel): """ Pydantic model for representing stock price data. """ date: str closing_price: float returns: Optional[float] holding_period_yield: Optional[float] holding_period_return: Optional[float] portfolio_of_1000: Optional[float]
[docs] def validation(self): """ TODO: Validate the data types and formats. Raises ------ ValueError If any validation fails. """ pass
[docs] def convert(self): """ TODO:Convert the data types and formats as needed. This method is responsible for converting the data types and formats of the StockPriceData object as needed. For example, it can be used to convert the date string to a datetime object. """ pass
[docs] def to_dict(self): """ Return a dictionary representation of the object. """ return vars(self)
# Define a Pydantic model for the stock data
[docs] class StockData(BaseModel): """ Pydantic model for representing stock data. Notes ----- 1. Rationale This Pydantic model defines the structure for representing stock data, consisting of a stock ticker symbol and a list of associated stock price data. """ ticker: str stock_price_data: list[StockPriceData]
[docs] def get_stock_data(symbol: str, provider: Literal[ ProviderEnum.FMP, ProviderEnum.INTRINIO, ProviderEnum.POLYGON, ProviderEnum.TIINGO, ProviderEnum.YFINANCE], start_date: str, interval: str) -> StockData: """ Retrieves and processes stock data for a given symbol, provider, start date, and interval. Parameters ---------- symbol: str The stock ticker symbol. provider: Literal[ProviderEnum.FMP, ProviderEnum.INTRINIO, ProviderEnum.POLYGON, ProviderEnum.TIINGO, ProviderEnum.YFINANCE] The data provider. start_date: str The start date for the data retrieval in 'YYYY-MM-DD' format. interval: str The interval for the stock data (e.g., '1d' = One day, '1W' = One week, '1M' = One month). Returns ------- StockData A Pydantic model instance containing the stock ticker symbol and a list of cleaned stock price data. Notes ----- 1. Implementation Details - Retrieves historical stock price data from the specified provider for given symbol, start date, and interval. - The raw stock price data is cleaned and additional metrics are calculated. - The cleaned data is structured into a StockData Pydantic model. """ stock_price: pd.DataFrame = obb.equity.price.historical(symbol=symbol, provider=provider, start_date=start_date, interval=interval).to_df() stock_price_clean: pd.DataFrame = clean_stock_price(stock_price) stock_price_data_dict: list[StockPriceData] = [ StockPriceData(**{str(k): v for k, v in data.items()}) for data in stock_price_clean.to_dict("records") ] stock_data: StockData = StockData(ticker=symbol, stock_price_data=stock_price_data_dict) return stock_data
[docs] def clean_stock_price(stock_price: pd.DataFrame) -> pd.DataFrame: """ Clean the raw stock price data and enhance it with additional calculated metrics. Parameters ---------- stock_price: pd.DataFrame The raw stock price data. Returns ------- pd.DataFrame The cleaned stock price data with additional calculated metrics. Notes ----- 1. Rationale This function takes raw stock price data and performs data cleaning operations, including selecting relevant columns, calculating returns, and deriving additional metrics such as holding period yield, holding period return, and portfolio value based on a hypothetical initial investment of $1000. """ stock_price_clean: pd.DataFrame = stock_price.loc[:, ["close"]] stock_price_clean["closing_price"] = stock_price_clean["close"] stock_price_clean["date"] = pd.to_datetime(stock_price_clean.index).strftime("%Y-%m-%d %H:%M:%S%z") stock_price_clean["returns"] = stock_price_clean["close"].pct_change() stock_price_clean["holding_period_yield"] = ( stock_price_clean["close"] / stock_price_clean["close"].shift(1) - 1 ) stock_price_clean["holding_period_return"] = stock_price_clean[ "close" ] / stock_price_clean["close"].shift(1) stock_price_clean["portfolio_of_1000"] = ( 1000 * stock_price_clean["holding_period_return"].cumprod() ) stock_price_clean.index = pd.to_datetime(stock_price_clean.index).strftime("%Y-%m-%d %H:%M:%S%z") return stock_price_clean