0
1

test2.py 1.0 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. #! /usr/bin/env python3
  2. import PyPDF2
  3. import pandas as pd
  4. class Senioritylist:
  5. def __init__(self, pdf_filename):
  6. self.lines = None
  7. self.num_lines = None
  8. self.raw = None
  9. self.effective_date = None
  10. self.base = None
  11. self.pdf_filename = pdf_filename
  12. self.parse_page(0)
  13. def import_pdf(self):
  14. obj = open(self.pdf_filename, 'rb')
  15. pdfreader = PyPDF2.PdfFileReader(obj)
  16. return pdfreader
  17. def parse_page(self, pagenum=0):
  18. pdfreader = self.import_pdf()
  19. self.raw = pdfreader.pages[pagenum].extractText()
  20. self.lines = self.raw.split("\n")
  21. self.num_lines = len(self.lines)
  22. base = None
  23. seat = None
  24. if self.lines[0].find("Effective Date") != -1:
  25. self.effective_date = self.lines[0][16:23]
  26. self.base = self.lines[0][84:88]
  27. if __name__ == '__main__':
  28. mylist = Senioritylist('Dec_22/dec22_sen.pdf')
  29. print(mylist.lines[0:10])
  30. print(mylist.effective_date)
  31. print(mylist.base)