get_datetime_info.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. import pandas as pd
  2. def get_datetime_info(df, date_col, timezone=None, drop=False):
  3. """
  4. Extract date and time information from a column in dataframe
  5. and add as new columns. Time zones are converted to local time if specified.
  6. :param df: pandas dataframe
  7. :param date_col: string representing the column containing datetimes. Can also be 'index' to use the index
  8. :param timezone: string for the time zone. If passed, times are converted to local
  9. :param drop: boolean indicating whether the original column should be dropped from the df
  10. :return df: dataframe with added date and time columns
  11. """
  12. df = df.copy()
  13. # Extract the field
  14. if date_col == 'index':
  15. fld = df.index.to_series()
  16. prefix = df.index.name if df.index.name is not None else 'datetime'
  17. else:
  18. fld = df[date_col]
  19. prefix = date_col
  20. # Make sure the field type is a datetime
  21. if timezone is not None:
  22. fld = pd.to_datetime(fld, utc=True)
  23. else:
  24. fld = pd.to_datetime(fld)
  25. # Convert to local time and then remove time zone information
  26. if timezone:
  27. df['utc'] = fld.dt.tz_convert('UTC').dt.tz_localize(None)
  28. fld = fld.dt.tz_convert(timezone).dt.tz_localize(None)
  29. df['local'] = fld
  30. # Used for naming the columns
  31. prefix += '_'
  32. # Basic attributes
  33. attr = ['Year', 'Month', 'Week', 'Day', 'Dayofweek', 'Dayofyear']
  34. # Additional attributes to extract
  35. attr = attr + [
  36. 'Is_month_end', 'Is_month_start', 'Is_quarter_end', 'Is_quarter_start',
  37. 'Is_year_end', 'Is_year_start'
  38. ]
  39. # Time attributes
  40. attr = attr + ['Hour', 'Minute', 'Second']
  41. # Iterate through each attribute and add it to the dataframe
  42. for n in attr:
  43. df[prefix + n] = getattr(fld.dt, n.lower())
  44. # Add fractional time of day
  45. df[prefix + 'FracDay'] = (df[prefix + 'Hour'] / 24) + (
  46. df[prefix + 'Minute'] / 60 / 24) + (
  47. df[prefix + 'Second'] / 60 / 60 / 24)
  48. # Add fractional time of week
  49. df[prefix + 'FracWeek'] = ((df[prefix + 'Dayofweek'] * 24) +
  50. (df[prefix + 'FracDay'] * 24)) / (7 * 24)
  51. # Drop the column if specified
  52. if drop:
  53. if date_col == 'index':
  54. df = df.reset_index().iloc[:, 1:].copy()
  55. else:
  56. df = df.drop(date_col, axis=1)
  57. return df