| 123456789101112131415161718192021222324252627282930313233343536373839 |
- #! /usr/bin/env python3
- import PyPDF2
- import pandas as pd
- class Senioritylist:
- def __init__(self, pdf_filename):
- self.lines = None
- self.num_lines = None
- self.raw = None
- self.effective_date = None
- self.base = None
- self.pdf_filename = pdf_filename
- self.parse_page(0)
- def import_pdf(self):
- obj = open(self.pdf_filename, 'rb')
- pdfreader = PyPDF2.PdfFileReader(obj)
- return pdfreader
- def parse_page(self, pagenum=0):
- pdfreader = self.import_pdf()
- self.raw = pdfreader.pages[pagenum].extractText()
- self.lines = self.raw.split("\n")
- self.num_lines = len(self.lines)
- base = None
- seat = None
- if self.lines[0].find("Effective Date") != -1:
- self.effective_date = self.lines[0][16:23]
- self.base = self.lines[0][84:88]
- if __name__ == '__main__':
- mylist = Senioritylist('Dec_22/dec22_sen.pdf')
- print(mylist.lines[0:10])
- print(mylist.effective_date)
- print(mylist.base)
|