Source code for hkjournalist.journalist

import pandas as pd
import os
import matplotlib
import inspect
import matplotlib.pyplot as plt
import subprocess
import datetime
import collections
from tabulate import tabulate
from pathlib import Path
from shutil import rmtree
import pkg_resources

resource_package = __name__
code_config_path = '/'.join(('configuration', 'code_block.tex'))
tex_config_path = pkg_resources.resource_filename(
    resource_package, code_config_path)

try:
    collectionsAbc = collections.abc
except AttributeError:
    collectionsAbc = collections


[docs]class Journalist(): """ Class to record and generate reports """ def __init__(self, template_file=None, fig_width=None, fig_height=None, tmp_path='./temp', zh=False): """ :param template_file: file path of md template :type template_file: str :param fig_width: figure width (percentage of whole page width), prior to `fig_height` settings :type fig_width: None, int :param fig_height: figure height (percentage of whole page width)` :type fig_height: None, int :param tmp_path: temporary directory path to store temporary files (such as figures) :type tmp_path: str :param zh: if it supports chinese (zh_CN) usage :type zh: bool """ self.template_file = template_file self._width = fig_width self._height = fig_height self.var_type = {} self.fig_counters = 0 self.tmp_path = tmp_path self.report_config = {} self.zh = zh if not os.path.exists(tmp_path): os.mkdir(tmp_path) if fig_width: self.fig_config = '{{ ' + f'width={fig_width}%' + ' }}' elif fig_height: self.fig_config = '{{ ' + f'height={fig_height}%' + ' }}' else: self.fig_config = '' def __preprocess(self, config_dict: dict): """ :param config_dict: :type config_dict: dict :return: config dict after pre-processing :rtype: dict """ applied_config_dict = config_dict.copy() for k, report_content in config_dict.items(): if isinstance(report_content, pd.DataFrame): # transform into a string with markdown table format, applied_config_dict[k] = tabulate( report_content.round(2), tablefmt='github', headers='keys') self.var_type[k] = 'table' elif isinstance(report_content, matplotlib.axes.SubplotBase): # save plot generated by matplotlib to a pdf format in temp directory fig_file = os.path.join( self.tmp_path, f'figure_{self.fig_counters}.pdf') self.fig_counters += 1 ax = report_content.get_figure() ax.savefig(fig_file) applied_config_dict[k] = fig_file self.var_type[k] = 'figure' elif callable(report_content): # print function definition on final report applied_config_dict[k] = inspect.getsource(report_content) self.var_type[k] = 'function' elif isinstance(report_content, list) and all(isinstance(s, str) for s in report_content): # concatenate all words into a sentence applied_config_dict[k] = str( len(report_content)) + ' ' + ', '.join(report_content) self.var_type[k] = 'list(str)' elif isinstance(report_content, collectionsAbc.Iterable) and all( isinstance(s, matplotlib.axes.SubplotBase) for s in report_content): # plot last ax fig_file = os.path.join( self.tmp_path, f'figure_{self.fig_counters}.pdf') self.fig_counters += 1 ax = report_content[-1].get_figure() ax.savefig(fig_file) applied_config_dict[k] = fig_file self.var_type[k] = 'figure' elif isinstance(report_content, pd.Series): applied_config_dict[k] = str( report_content).replace('\n', '\n\n') self.var_type[k] = 'series' else: # otherwise: leave it as origin format (use its own str method) applied_config_dict[k] = report_content self.var_type[k] = 'other' return applied_config_dict
[docs] def hear(self, config_dict: dict): """ Pass your variables mappings to the reporter :param config_dict: variable mappings such as {'var_name':value} :type config_dict: dict :return: None :rtype: None """ newest_config = self.__preprocess(config_dict) self.report_config.update(newest_config)
[docs] def generate_template(self, template_file='./template.md', title='template', author='Author', append=False): """Generate a `md` template according to mappings which previously passed to. The output template will be structed as each variable on a single slide with variable name as its title **Note**:it may overwrite the file with the address :param template_file: output template file path :type template_file: str :return: None :rtype: None :param title: report title :type title: str :param author: author name :type author: str :param append: If use append mode to add new contents of report :type append: bool :return: """ if self.template_file and not append: print('warning: template file was specified before and will be overwritten') self.template_file = template_file if append: report_text = open(self.template_file).read() + '\n' else: report_text = '---\n' if self.zh: report_text += 'documentclass: ctexbeamer\n' report_text += f'title: {title}\n' report_text += f'author: {author}\n' report_text += 'date: \\today{{}}\n' report_text += '---\n' for k, v in self.var_type.items(): k_name = '{' + k + '}' title = f"### {k}\n\n" if v == 'figure': content = f'![]({k_name}){self.fig_config}\n\n' elif v == 'function': title = f"### {k}" + '{{.fragile}}\n\n' content = '```{{.python}}\n' + k_name + '\n```\n\n' else: # In case the content is too long, allow multiple frames to display if v == 'list(str)' or v == 'table' or v == 'series': title = f"### {k}" + '{{.allowframebreaks}}\n\n' content = k_name + '\n\n' report_text = report_text + title + content Path(template_file).write_text(report_text) print(f'New template file is generated in {template_file}')
[docs] def report(self, output_file='./final_report.pdf', beamer=True, theme='default', color_theme='seagull', use_template_config=False, overwrite=True, aspectratio=43,): """ Generate final pdf (or other format) report using previously heard config dict :param output_file: final output file path :type output_file: str :param beamer: whether the output pdf will be a beamer slides ? :type beamer: bool :param theme: the theme used to create beamer (see https://hartwork.org/beamer-theme-matrix/)) :type theme: str :param color_theme: the color theme used to create beamer (see https://hartwork.org/beamer-theme-matrix/) :type color_theme: str :param use_template_config: whether use metadata params of format in your custom template, if false, just use params in this function to produce a report. otherwise, please ref to https://pandoc.org/MANUAL.html to write a fine ``md`` template :type use_template_config: bool :param overwrite: whether use a timestamp of current time as a postfix for the final output filename. if false, a new file will occur every time the method call without overwriting previouly ones. :type overwrite: bool :param aspectratio: aspect ratio of slide page. only valid when `beamer` is turn on and output format is `pdf` :type aspectratio: int :return: execution return code (0 if succeed) :rtype: int """ raw_file = os.path.join(self.tmp_path, 'raw_report.md') output_file = output_file.replace(' ', '-') report_name, ext = os.path.splitext(output_file) args_list = ['pandoc', '-s', raw_file, '--listings', '-H', tex_config_path] if overwrite: final_file = output_file else: timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S") final_file = f"{report_name}_{timestamp}{ext}" report_template_text = open( self.template_file, 'r', encoding='utf8').read() Path(raw_file).write_text(report_template_text.format( **self.report_config), encoding='utf8') if beamer and ext == '.pdf': args_list += ['-t', 'beamer'] else: args_list += ['-t', 'latex'] if beamer and not use_template_config: args_list += ['-V', 'theme:' + theme, '-V', 'colortheme:' + color_theme, '-V', 'aspectratio:' + str(aspectratio)] if self.zh: args_list.append('--pdf-engine=xelatex') args_list += ['-o', final_file] # command = f'pandoc {beamer_command} {raw_file} {tex_command} {args_list} -s -o {final_file}' # shell=True sometimes leads to invalid output proc = subprocess.run(args_list, shell=False, capture_output=True) if proc.returncode == 0: print(f'Make a big news! The newest report is now in {final_file}') else: # for debug print(f'Report failed with code {proc.returncode} \n', f'stderr: {proc.stderr.decode("utf8")} \n ', f'stdout: {proc.stdout.decode("utf8")}') if os.path.exists(self.tmp_path): rmtree(self.tmp_path) return proc.returncode