Python自动化大师第5篇:Excel与CSV文件自动化
Python Automation Master Part 5: Excel and CSV File Automation
前言:电子表格自动化的重要性
Excel和CSV文件是商业环境中使用最广泛的数据格式。每天都有大量数据以电子表格形式被创建、处理和报告。使用Python自动化这些重复性工作可以节省时间并减少人为错误。
在第5篇中,我们将学习如何使用Python的csv模块、pandas和openpyxl库高效处理CSV和Excel文件。从基本的文件读写到单元格样式设置、图表创建、大文件处理,我们将介绍可以立即在实际工作中应用的技术。
1. CSV文件处理基础(csv模块)
Python内置的csv模块可以在不安装任何额外包的情况下处理CSV文件。
1.1 读取CSV文件
import csv
# 基本CSV读取
with open('data.csv', 'r', encoding='utf-8') as file:
reader = csv.reader(file)
for row in reader:
print(row) # 每行以列表形式返回
# 以字典形式读取(基于表头)
with open('data.csv', 'r', encoding='utf-8') as file:
reader = csv.DictReader(file)
for row in reader:
print(row['姓名'], row['年龄']) # 通过列名访问
# 指定分隔符
with open('data.tsv', 'r', encoding='utf-8') as file:
reader = csv.reader(file, delimiter='\t') # 制表符分隔
for row in reader:
print(row)
1.2 写入CSV文件
import csv
# 基本CSV写入
data = [
['姓名', '年龄', '城市'],
['张三', 25, '北京'],
['李四', 30, '上海'],
['王五', 28, '广州']
]
with open('output.csv', 'w', encoding='utf-8-sig', newline='') as file:
writer = csv.writer(file)
writer.writerows(data) # 一次写入多行
# 以字典形式写入
data_dict = [
{'姓名': '张三', '年龄': 25, '城市': '北京'},
{'姓名': '李四', '年龄': 30, '城市': '上海'},
{'姓名': '王五', '年龄': 28, '城市': '广州'}
]
with open('output_dict.csv', 'w', encoding='utf-8-sig', newline='') as file:
fieldnames = ['姓名', '年龄', '城市']
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader() # 写入表头
writer.writerows(data_dict)
提示:在Windows上用Excel打开CSV文件时,为防止中文乱码,请使用encoding='utf-8-sig'。添加BOM(字节顺序标记)后,Excel可以正确识别编码。
2. 使用pandas处理CSV
pandas是一个专为数据分析优化的库,为CSV文件处理提供了强大的功能。
2.1 pandas安装与基本使用
# 安装
pip install pandas
import pandas as pd
# 读取CSV
df = pd.read_csv('data.csv', encoding='utf-8')
# 查看基本信息
print(df.head()) # 前5行
print(df.info()) # 数据类型和缺失值信息
print(df.describe()) # 统计摘要
# 选择特定列
print(df['姓名']) # 单列
print(df[['姓名', '年龄']]) # 多列
# 条件筛选
adults = df[df['年龄'] >= 20]
beijing_people = df[df['城市'] == '北京']
2.2 数据预处理与加工
import pandas as pd
df = pd.read_csv('sales_data.csv')
# 处理缺失值
df.fillna(0, inplace=True) # 用0填充
df.dropna(subset=['必填列'], inplace=True) # 删除特定列有缺失值的行
# 添加新列
df['总金额'] = df['单价'] * df['数量']
df['折扣价'] = df['总金额'] * 0.9
# 重命名列
df.rename(columns={'old_name': 'new_name'}, inplace=True)
# 数据排序
df.sort_values(by='总金额', ascending=False, inplace=True)
# 按组聚合
summary = df.groupby('类别').agg({
'总金额': 'sum',
'数量': 'mean',
'订单号': 'count'
}).rename(columns={'订单号': '订单数'})
# 保存为CSV
df.to_csv('processed_data.csv', index=False, encoding='utf-8-sig')
2.3 合并多个CSV文件
import pandas as pd
import glob
# 合并相同格式的多个CSV文件
csv_files = glob.glob('data/*.csv')
df_list = [pd.read_csv(f) for f in csv_files]
combined_df = pd.concat(df_list, ignore_index=True)
# 合并两个数据框(JOIN)
df1 = pd.read_csv('customers.csv')
df2 = pd.read_csv('orders.csv')
# 内连接(只保留两边都有的)
merged = pd.merge(df1, df2, on='客户ID', how='inner')
# 外连接(保留所有数据)
merged_all = pd.merge(df1, df2, on='客户ID', how='outer')
3. 使用openpyxl创建/修改Excel文件
openpyxl是一个专门用于创建和修改Excel文件(.xlsx)的库。
3.1 安装与基本使用
# 安装
pip install openpyxl
from openpyxl import Workbook, load_workbook
# 创建新工作簿
wb = Workbook()
ws = wb.active
ws.title = "销售数据"
# 输入数据
ws['A1'] = '产品名称'
ws['B1'] = '数量'
ws['C1'] = '单价'
# 添加多行
data = [
['笔记本电脑', 10, 1200000],
['鼠标', 50, 25000],
['键盘', 30, 45000]
]
for row in data:
ws.append(row)
# 保存
wb.save('sales.xlsx')
# 打开现有文件
wb = load_workbook('sales.xlsx')
ws = wb.active
# 读取单元格值
print(ws['A1'].value)
print(ws.cell(row=2, column=1).value)
# 读取范围
for row in ws.iter_rows(min_row=2, max_row=4, min_col=1, max_col=3):
for cell in row:
print(cell.value, end=' ')
print()
3.2 单元格样式(字体、颜色、边框)
from openpyxl import Workbook
from openpyxl.styles import Font, Fill, PatternFill, Border, Side, Alignment
wb = Workbook()
ws = wb.active
# 输入数据
headers = ['产品名称', '数量', '单价', '总金额']
for col, header in enumerate(headers, 1):
ws.cell(row=1, column=col, value=header)
# 字体样式
bold_font = Font(bold=True, size=12, color='FFFFFF')
normal_font = Font(size=11)
# 背景色
header_fill = PatternFill(start_color='4472C4', end_color='4472C4', fill_type='solid')
alt_fill = PatternFill(start_color='D9E2F3', end_color='D9E2F3', fill_type='solid')
# 边框
thin_border = Border(
left=Side(style='thin'),
right=Side(style='thin'),
top=Side(style='thin'),
bottom=Side(style='thin')
)
# 对齐
center_align = Alignment(horizontal='center', vertical='center')
right_align = Alignment(horizontal='right')
# 应用表头样式
for col in range(1, 5):
cell = ws.cell(row=1, column=col)
cell.font = bold_font
cell.fill = header_fill
cell.border = thin_border
cell.alignment = center_align
# 数据行样式
data = [
['笔记本电脑', 10, 1200000],
['鼠标', 50, 25000],
['键盘', 30, 45000]
]
for row_idx, row_data in enumerate(data, 2):
for col_idx, value in enumerate(row_data, 1):
cell = ws.cell(row=row_idx, column=col_idx, value=value)
cell.border = thin_border
if row_idx % 2 == 0:
cell.fill = alt_fill
# 调整列宽
ws.column_dimensions['A'].width = 15
ws.column_dimensions['B'].width = 10
ws.column_dimensions['C'].width = 15
ws.column_dimensions['D'].width = 15
# 调整行高
ws.row_dimensions[1].height = 25
wb.save('styled_sales.xlsx')
3.3 插入公式
from openpyxl import Workbook
wb = Workbook()
ws = wb.active
# 输入数据
ws['A1'] = '数量'
ws['B1'] = '单价'
ws['C1'] = '总金额'
data = [
[10, 1200000],
[50, 25000],
[30, 45000]
]
for row_idx, (qty, price) in enumerate(data, 2):
ws.cell(row=row_idx, column=1, value=qty)
ws.cell(row=row_idx, column=2, value=price)
# 插入公式(总金额 = 数量 * 单价)
ws.cell(row=row_idx, column=3, value=f'=A{row_idx}*B{row_idx}')
# 合计公式
last_row = len(data) + 1
ws.cell(row=last_row + 1, column=1, value='合计')
ws.cell(row=last_row + 1, column=3, value=f'=SUM(C2:C{last_row})')
# 条件公式
ws['D1'] = '等级'
for row_idx in range(2, last_row + 1):
ws.cell(row=row_idx, column=4,
value=f'=IF(C{row_idx}>=10000000,"A",IF(C{row_idx}>=1000000,"B","C"))')
wb.save('formulas.xlsx')
4. 创建图表
使用openpyxl可以通过编程方式在Excel中创建各种图表。
from openpyxl import Workbook
from openpyxl.chart import BarChart, LineChart, PieChart, Reference
wb = Workbook()
ws = wb.active
# 输入数据
data = [
['月份', '销售额', '成本', '利润'],
['1月', 5000000, 3000000, 2000000],
['2月', 6000000, 3500000, 2500000],
['3月', 5500000, 3200000, 2300000],
['4月', 7000000, 4000000, 3000000],
['5月', 8000000, 4500000, 3500000],
['6月', 7500000, 4200000, 3300000]
]
for row in data:
ws.append(row)
# 柱状图
bar_chart = BarChart()
bar_chart.type = "col" # 垂直柱状图
bar_chart.style = 10
bar_chart.title = "月度销售情况"
bar_chart.y_axis.title = "金额(元)"
bar_chart.x_axis.title = "月份"
# 指定数据范围
data_ref = Reference(ws, min_col=2, min_row=1, max_col=4, max_row=7)
categories = Reference(ws, min_col=1, min_row=2, max_row=7)
bar_chart.add_data(data_ref, titles_from_data=True)
bar_chart.set_categories(categories)
bar_chart.shape = 4 # 图表样式
ws.add_chart(bar_chart, "F2") # 图表位置
# 折线图
line_chart = LineChart()
line_chart.title = "月度趋势"
line_chart.style = 13
line_chart.y_axis.title = "金额"
line_chart.x_axis.title = "月份"
line_chart.add_data(data_ref, titles_from_data=True)
line_chart.set_categories(categories)
ws.add_chart(line_chart, "F18")
# 饼图(仅6月数据)
pie_chart = PieChart()
pie_chart.title = "6月成本结构"
labels = Reference(ws, min_col=2, max_col=4, min_row=1)
pie_data = Reference(ws, min_col=2, max_col=4, min_row=7)
pie_chart.add_data(pie_data, titles_from_data=False)
pie_chart.set_categories(labels)
ws.add_chart(pie_chart, "N2")
wb.save('charts.xlsx')
5. 处理多个工作表
from openpyxl import Workbook, load_workbook
# 创建包含多个工作表的新工作簿
wb = Workbook()
# 第一个工作表(默认工作表)
ws1 = wb.active
ws1.title = "摘要"
# 创建额外的工作表
ws2 = wb.create_sheet("第一季度")
ws3 = wb.create_sheet("第二季度")
ws4 = wb.create_sheet("第三季度", 0) # 插入到最前面
# 每个工作表输入数据
quarters = {
'第一季度': [['1月', 100], ['2月', 120], ['3月', 110]],
'第二季度': [['4月', 130], ['5月', 140], ['6月', 135]],
'第三季度': [['7月', 150], ['8月', 160], ['9月', 155]]
}
for sheet_name, data in quarters.items():
ws = wb[sheet_name]
ws.append(['月份', '销售额'])
for row in data:
ws.append(row)
# 在摘要工作表中引用各季度合计
summary = wb['摘要']
summary.append(['季度', '总销售额'])
summary.append(['第一季度', '=SUM(\'第一季度\'!B2:B4)'])
summary.append(['第二季度', '=SUM(\'第二季度\'!B2:B4)'])
summary.append(['第三季度', '=SUM(\'第三季度\'!B2:B4)'])
# 更改工作表顺序
wb._sheets = [wb['摘要'], wb['第一季度'], wb['第二季度'], wb['第三季度']]
# 复制工作表
source = wb['第一季度']
target = wb.copy_worksheet(source)
target.title = "第一季度_备份"
# 删除工作表
# del wb['第一季度_备份']
wb.save('multi_sheets.xlsx')
# 遍历现有文件的所有工作表
wb = load_workbook('multi_sheets.xlsx')
for sheet_name in wb.sheetnames:
ws = wb[sheet_name]
print(f"工作表: {sheet_name}")
for row in ws.iter_rows(values_only=True):
print(row)
6. 合并Excel文件
import pandas as pd
from openpyxl import load_workbook
import glob
# 使用pandas合并Excel文件
def merge_excel_files(file_pattern, output_file):
"""将多个Excel文件合并为一个"""
excel_files = glob.glob(file_pattern)
df_list = []
for file in excel_files:
df = pd.read_excel(file)
df['源文件'] = file # 标记来源
df_list.append(df)
combined_df = pd.concat(df_list, ignore_index=True)
combined_df.to_excel(output_file, index=False)
print(f"合并完成: {len(excel_files)}个文件 -> {output_file}")
merge_excel_files('reports/*.xlsx', 'combined_report.xlsx')
# 将多个工作表合并为一个工作表
def merge_sheets(input_file, output_file):
"""将一个Excel文件中的所有工作表合并为一个"""
xl = pd.ExcelFile(input_file)
df_list = []
for sheet_name in xl.sheet_names:
df = pd.read_excel(xl, sheet_name=sheet_name)
df['工作表名'] = sheet_name
df_list.append(df)
combined_df = pd.concat(df_list, ignore_index=True)
combined_df.to_excel(output_file, index=False)
merge_sheets('multi_sheets.xlsx', 'merged_single_sheet.xlsx')
# 条件合并(基于特定列)
def conditional_merge(files, key_column, output_file):
"""基于特定键列合并多个文件"""
base_df = pd.read_excel(files[0])
for file in files[1:]:
df = pd.read_excel(file)
base_df = pd.merge(base_df, df, on=key_column, how='outer')
base_df.to_excel(output_file, index=False)
conditional_merge(['customers.xlsx', 'orders.xlsx', 'payments.xlsx'],
'客户ID', 'customer_full_data.xlsx')
7. 大文件处理(分块)
处理数十万行以上的大文件时,需要考虑内存效率。
import pandas as pd
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows
# 分块处理大型CSV
def process_large_csv(input_file, output_file, chunk_size=10000):
"""分块处理大型CSV"""
processed_chunks = []
for chunk in pd.read_csv(input_file, chunksize=chunk_size):
# 处理每个块
chunk['已处理'] = True
chunk['总金额'] = chunk['数量'] * chunk['单价']
processed_chunks.append(chunk)
print(f"已处理行数: {len(chunk)}")
# 保存结果
result_df = pd.concat(processed_chunks, ignore_index=True)
result_df.to_csv(output_file, index=False)
# 分块筛选并保存(节省内存)
def filter_large_csv(input_file, output_file, condition_func, chunk_size=10000):
"""从大型CSV中仅提取符合条件的行"""
first_chunk = True
for chunk in pd.read_csv(input_file, chunksize=chunk_size):
filtered = chunk[condition_func(chunk)]
if first_chunk:
filtered.to_csv(output_file, index=False, mode='w')
first_chunk = False
else:
filtered.to_csv(output_file, index=False, mode='a', header=False)
# 使用示例:仅提取总金额100万元以上的行
filter_large_csv(
'large_sales.csv',
'high_value_sales.csv',
lambda df: df['总金额'] >= 1000000
)
# 读取大型Excel文件(read_only模式)
def read_large_excel(file_path):
"""高效读取大型Excel文件"""
wb = load_workbook(file_path, read_only=True)
ws = wb.active
for row in ws.iter_rows(values_only=True):
yield row # 作为生成器逐行返回
wb.close()
# 写入大型Excel文件(write_only模式)
from openpyxl import Workbook
def write_large_excel(data_generator, output_file):
"""高效写入大量数据到Excel"""
wb = Workbook(write_only=True)
ws = wb.create_sheet()
for row in data_generator:
ws.append(row)
wb.save(output_file)
8. 实战:自动生成月度报表
现在让我们综合所学内容,创建一个可以在实际工作中使用的月度销售报表自动生成系统。
import pandas as pd
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Border, Side, Alignment
from openpyxl.chart import BarChart, LineChart, PieChart, Reference
from openpyxl.utils.dataframe import dataframe_to_rows
from datetime import datetime
import os
class MonthlyReportGenerator:
"""月度销售报表自动生成类"""
def __init__(self, data_file, output_dir='reports'):
self.data_file = data_file
self.output_dir = output_dir
self.df = None
self.wb = None
# 创建输出目录
os.makedirs(output_dir, exist_ok=True)
# 定义样式
self.header_font = Font(bold=True, size=11, color='FFFFFF')
self.header_fill = PatternFill(start_color='2F5496', end_color='2F5496', fill_type='solid')
self.border = Border(
left=Side(style='thin'),
right=Side(style='thin'),
top=Side(style='thin'),
bottom=Side(style='thin')
)
self.center_align = Alignment(horizontal='center', vertical='center')
self.number_format = '#,##0'
self.currency_format = '#,##0"元"'
def load_data(self):
"""加载数据"""
self.df = pd.read_csv(self.data_file, parse_dates=['订单日期'])
self.df['月份'] = self.df['订单日期'].dt.to_period('M')
print(f"数据加载完成: {len(self.df)}行")
def create_summary_sheet(self, ws):
"""创建摘要工作表"""
ws.title = "摘要"
# 标题
ws['A1'] = '月度销售报表'
ws['A1'].font = Font(bold=True, size=16)
ws.merge_cells('A1:E1')
# 生成时间
ws['A2'] = f'生成时间: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}'
ws['A2'].font = Font(italic=True, size=10)
# 月度汇总数据
monthly_summary = self.df.groupby('月份').agg({
'订单号': 'count',
'数量': 'sum',
'总金额': 'sum'
}).reset_index()
monthly_summary.columns = ['月份', '订单数', '销售数量', '总销售额']
# 表格起始位置
start_row = 4
headers = ['月份', '订单数', '销售数量', '总销售额']
# 写入表头
for col, header in enumerate(headers, 1):
cell = ws.cell(row=start_row, column=col, value=header)
cell.font = self.header_font
cell.fill = self.header_fill
cell.border = self.border
cell.alignment = self.center_align
# 写入数据
for row_idx, row in monthly_summary.iterrows():
ws.cell(row=start_row + row_idx + 1, column=1, value=str(row['月份']))
ws.cell(row=start_row + row_idx + 1, column=2, value=row['订单数'])
ws.cell(row=start_row + row_idx + 1, column=3, value=row['销售数量'])
cell = ws.cell(row=start_row + row_idx + 1, column=4, value=row['总销售额'])
cell.number_format = self.currency_format
for col in range(1, 5):
ws.cell(row=start_row + row_idx + 1, column=col).border = self.border
# 合计行
total_row = start_row + len(monthly_summary) + 1
ws.cell(row=total_row, column=1, value='合计').font = Font(bold=True)
ws.cell(row=total_row, column=2, value=f'=SUM(B{start_row+1}:B{total_row-1})')
ws.cell(row=total_row, column=3, value=f'=SUM(C{start_row+1}:C{total_row-1})')
ws.cell(row=total_row, column=4, value=f'=SUM(D{start_row+1}:D{total_row-1})')
ws.cell(row=total_row, column=4).number_format = self.currency_format
for col in range(1, 5):
ws.cell(row=total_row, column=col).border = self.border
# 调整列宽
ws.column_dimensions['A'].width = 12
ws.column_dimensions['B'].width = 12
ws.column_dimensions['C'].width = 12
ws.column_dimensions['D'].width = 18
# 添加图表
chart = BarChart()
chart.type = "col"
chart.style = 10
chart.title = "月度销售情况"
chart.y_axis.title = "销售额(元)"
data = Reference(ws, min_col=4, min_row=start_row,
max_row=start_row + len(monthly_summary))
cats = Reference(ws, min_col=1, min_row=start_row + 1,
max_row=start_row + len(monthly_summary))
chart.add_data(data, titles_from_data=True)
chart.set_categories(cats)
chart.shape = 4
ws.add_chart(chart, "F4")
return monthly_summary
def create_category_sheet(self, ws):
"""创建类别分析工作表"""
ws.title = "按类别"
# 类别汇总
category_summary = self.df.groupby('类别').agg({
'订单号': 'count',
'数量': 'sum',
'总金额': 'sum'
}).reset_index()
category_summary.columns = ['类别', '订单数', '销售数量', '总销售额']
category_summary = category_summary.sort_values('总销售额', ascending=False)
# 写入表格
headers = ['类别', '订单数', '销售数量', '总销售额', '占比']
for col, header in enumerate(headers, 1):
cell = ws.cell(row=1, column=col, value=header)
cell.font = self.header_font
cell.fill = self.header_fill
cell.border = self.border
total_sales = category_summary['总销售额'].sum()
for row_idx, row in category_summary.reset_index(drop=True).iterrows():
ws.cell(row=row_idx + 2, column=1, value=row['类别'])
ws.cell(row=row_idx + 2, column=2, value=row['订单数'])
ws.cell(row=row_idx + 2, column=3, value=row['销售数量'])
ws.cell(row=row_idx + 2, column=4, value=row['总销售额']).number_format = self.currency_format
share_cell = ws.cell(row=row_idx + 2, column=5, value=row['总销售额'] / total_sales)
share_cell.number_format = '0.0%'
for col in range(1, 6):
ws.cell(row=row_idx + 2, column=col).border = self.border
# 调整列宽
ws.column_dimensions['A'].width = 15
ws.column_dimensions['D'].width = 18
# 饼图
pie = PieChart()
pie.title = "类别销售占比"
labels = Reference(ws, min_col=1, min_row=2, max_row=len(category_summary) + 1)
data = Reference(ws, min_col=4, min_row=1, max_row=len(category_summary) + 1)
pie.add_data(data, titles_from_data=True)
pie.set_categories(labels)
ws.add_chart(pie, "G2")
def create_detail_sheet(self, ws):
"""创建详细数据工作表"""
ws.title = "详细数据"
# 将数据框写入工作表
columns = ['订单号', '订单日期', '客户名', '产品名', '类别', '数量', '单价', '总金额']
df_subset = self.df[columns].copy()
df_subset['订单日期'] = df_subset['订单日期'].dt.strftime('%Y-%m-%d')
# 表头
for col, header in enumerate(columns, 1):
cell = ws.cell(row=1, column=col, value=header)
cell.font = self.header_font
cell.fill = self.header_fill
cell.border = self.border
# 数据
for row_idx, row in df_subset.iterrows():
for col_idx, col in enumerate(columns, 1):
cell = ws.cell(row=row_idx + 2, column=col_idx, value=row[col])
cell.border = self.border
if col in ['数量', '单价', '总金额']:
cell.number_format = self.number_format
# 列宽
widths = [12, 12, 12, 20, 12, 8, 12, 15]
for col_idx, width in enumerate(widths, 1):
ws.column_dimensions[chr(64 + col_idx)].width = width
# 应用筛选
ws.auto_filter.ref = f"A1:H{len(df_subset) + 1}"
def generate_report(self):
"""生成报表主函数"""
print("开始生成报表...")
# 加载数据
self.load_data()
# 创建工作簿
self.wb = Workbook()
# 创建工作表
ws_summary = self.wb.active
self.create_summary_sheet(ws_summary)
ws_category = self.wb.create_sheet()
self.create_category_sheet(ws_category)
ws_detail = self.wb.create_sheet()
self.create_detail_sheet(ws_detail)
# 保存文件
today = datetime.now().strftime('%Y%m')
output_file = os.path.join(self.output_dir, f'月度报表_{today}.xlsx')
self.wb.save(output_file)
print(f"报表生成完成: {output_file}")
return output_file
# 使用示例
if __name__ == "__main__":
# 生成示例数据(实际使用现有CSV)
import random
from datetime import timedelta
# 生成示例数据
categories = ['电子产品', '服装', '食品', '家具', '图书']
products = {
'电子产品': ['笔记本电脑', '智能手机', '平板电脑', '耳机'],
'服装': ['T恤', '牛仔裤', '夹克', '大衣'],
'食品': ['零食', '饮料', '方便面', '咖啡'],
'家具': ['书桌', '椅子', '床', '沙发'],
'图书': ['小说', '自助书籍', '漫画', '技术书籍']
}
data = []
base_date = datetime(2025, 1, 1)
for i in range(500):
category = random.choice(categories)
product = random.choice(products[category])
qty = random.randint(1, 10)
price = random.randint(10000, 500000)
data.append({
'订单号': f'ORD{i+1:05d}',
'订单日期': base_date + timedelta(days=random.randint(0, 180)),
'客户名': f'客户{random.randint(1, 100):03d}',
'产品名': product,
'类别': category,
'数量': qty,
'单价': price,
'总金额': qty * price
})
sample_df = pd.DataFrame(data)
sample_df.to_csv('sample_sales_data.csv', index=False, encoding='utf-8-sig')
# 生成报表
generator = MonthlyReportGenerator('sample_sales_data.csv')
generator.generate_report()
总结
在本篇中,我们学习了使用Python进行Excel和CSV文件自动化的核心技术。
- csv模块:适合简单的CSV文件读写
- pandas:为数据分析和预处理提供强大功能
- openpyxl:专门用于创建Excel文件、设置样式和创建图表
在实际工作中,这三者会根据情况组合使用。通常使用pandas进行简单的数据处理,最终报表则使用openpyxl进行样式美化。
下一篇第6篇将介绍邮件和通知自动化。我们将学习如何将今天创建的报表通过邮件自动发送,或通过Slack/Telegram发送通知。