luban/src/Luban.Job.Cfg/Source/DataSources/Excel/Sheet.cs

661 lines
22 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

using ExcelDataReader;
using Luban.Job.Cfg.Datas;
using Luban.Job.Cfg.TypeVisitors;
using Luban.Job.Common.Types;
using System;
using System.Collections.Generic;
using System.Linq;
namespace Luban.Job.Cfg.DataSources.Excel
{
class Sheet
{
private static readonly NLog.Logger s_logger = NLog.LogManager.GetCurrentClassLogger();
private bool OrientRow { get; set; } = true; // 以行为数据读取方向
private bool Align { get; set; } = true;// 标题头与数据严格对齐的 固定列格式
private bool IsMultiRow { get; set; }
private int TitleRows { get; set; } = 3; // 默认有三行是标题行. 第一行是字段名,第二行是中文描述,第三行是注释
public string Name { get; }
private List<List<Cell>> _rowColumns;
private Title _rootTitle;
private bool ExportTestData { get; }
public class Title
{
public int FromIndex { get; set; }
public int ToIndex { get; set; }
public string Name { get; set; }
public Dictionary<string, Title> SubTitles { get; set; } = new Dictionary<string, Title>();
public List<Title> SubTitleList { get; set; } = new List<Title>();
public void AddSubTitle(Title title)
{
if (!SubTitles.TryAdd(title.Name, title))
{
throw new Exception($"标题:{title.Name} 重复");
}
SubTitleList.Add(title);
}
// 由于先处理merge再处理只占一列的标题头.
// sub titles 未必是有序的。对于大多数数据并无影响
// 但对于 list类型的多级标题头有可能导致element 数据次序乱了
public void SortSubTitles()
{
SubTitleList.Sort((t1, t2) => t1.FromIndex - t2.FromIndex);
foreach (var t in SubTitleList)
{
t.SortSubTitles();
}
}
public override string ToString()
{
return $"name:{Name} [{FromIndex}, {ToIndex}] sub titles:[{string.Join(",\\n", SubTitleList)}]";
}
}
public struct Cell
{
public Cell(int row, int column, object value)
{
this.Row = row;
this.Column = column;
this.Value = value;
}
public int Row { get; } // 从 1 开始
public int Column { get; } // 从 0 开始,考虑改了它?
public object Value { get; }
private static string ToAlphaString(int column)
{
int h = column / 26;
int n = column % 26;
return $"{(h > 0 ? ((char)('A' + h - 1)).ToString() : "")}{(char)('A' + n)}";
}
public override string ToString()
{
return $"[{ToAlphaString(Column)}:{Row + 1}] {Value}";
}
}
public class NamedRow
{
public Title SelfTitle { get; }
public List<List<Cell>> Rows { get; }
public Dictionary<string, Title> Titles => SelfTitle.SubTitles;
public List<Title> TitleList => SelfTitle.SubTitleList;
public NamedRow(Title selfTitle, List<Cell> row)
{
SelfTitle = selfTitle;
Rows = new List<List<Cell>>() { row };
}
public NamedRow(Title selfTitle, List<List<Cell>> rows)
{
SelfTitle = selfTitle;
Rows = rows;
}
public int RowCount => Rows.Count;
private void CheckEmptySinceSecondRow(string name, int fromIndex, int toIndex)
{
for (int i = 1; i < Rows.Count; i++)
{
var row = Rows[i];
if (!IsBlankRow(row, fromIndex, toIndex))
{
throw new Exception($"字段:{name} 不是多行字段,只能第一行填值. {Bright.Common.StringUtil.CollectionToString(row)}");
}
}
}
public Title GetTitle(string name)
{
return Titles.TryGetValue(name, out var title) ? title : null;
}
public ExcelStream GetColumn(string name, string sep)
{
if (Titles.TryGetValue(name, out var title))
{
CheckEmptySinceSecondRow(name, title.FromIndex, title.ToIndex);
var es = new ExcelStream(Rows[0], title.FromIndex, title.ToIndex, sep);
return es;
}
else
{
throw new Exception($"单元薄 缺失 列:{name},请检查是否写错或者遗漏");
}
}
public NamedRow GetSubTitleNamedRow(string name)
{
Title title = this.Titles[name];
CheckEmptySinceSecondRow(name, title.FromIndex, title.ToIndex);
return new NamedRow(title, this.Rows[0]);
}
public NamedRow GetSubTitleNamedRowOfMultiRows(string name)
{
Title title = Titles[name];
return new NamedRow(title, this.Rows);
}
public IEnumerable<NamedRow> GenerateSubNameRows()
{
foreach (var row in Rows)
{
if (SelfTitle != null ? IsBlankRow(row, SelfTitle.FromIndex, SelfTitle.ToIndex) : IsBlankRow(row))
{
continue;
}
yield return new NamedRow(SelfTitle, row);
}
}
public IEnumerable<ExcelStream> GetColumnOfMultiRows(string name, string sep)
{
if (Titles.TryGetValue(name, out var title))
{
foreach (var row in Rows)
{
if (IsBlankRow(row, title.FromIndex, title.ToIndex))
{
continue;
}
yield return new ExcelStream(row, title.FromIndex, title.ToIndex, sep);
}
}
else
{
throw new Exception($"单元薄 缺失 列:{name},请检查是否写错或者遗漏");
}
}
}
public Sheet(string name, bool exportTestData)
{
this.Name = name;
this.ExportTestData = exportTestData;
}
public bool Load(IExcelDataReader reader)
{
//s_logger.Info("read sheet:{sheet}", reader.Name);
if (!ParseMeta(reader))
{
return false;
}
s_logger.Trace("align:{align} row:{orient}", Align, OrientRow);
if (!Align)
{
throw new Exception($"当前不支持 align:false");
}
LoadRemainRows(reader);
return true;
}
private bool ParseMeta(IExcelDataReader reader)
{
if (!reader.Read() || reader.FieldCount == 0)
{
return false;
}
// meta 行 必须以 ##为第一个单元格内容,紧接着 key:value 形式 表达meta属性
if (reader.GetString(0) != "##")
{
return false;
}
for (int i = 1, n = reader.FieldCount; i < n; i++)
{
var attr = reader.GetString(i);
if (string.IsNullOrWhiteSpace(attr))
{
continue;
}
var ss = attr.Split(':');
if (ss.Length != 2)
{
throw new Exception($"单元薄 meta 定义出错. attribute:{attr}");
}
string key = ss[0].ToLower();
string value = ss[1];
switch (key)
{
case "align":
{
if (!bool.TryParse(value, out var v))
{
throw new Exception($"单元薄 meta 定义 align:{value} 属性值只能为true或false");
}
Align = v;
break;
}
case "row":
{
if (!bool.TryParse(value, out var v))
{
throw new Exception($"单元薄 meta 定义 row:{value} 属性值只能为true或false");
}
OrientRow = v;
break;
}
case "multi_rows":
{
if (!bool.TryParse(value, out var v))
{
throw new Exception($"单元薄 meta 定义 multi_rows:{value} 属性值只能为true或false");
}
IsMultiRow = v;
break;
}
case "title_rows":
{
if (!int.TryParse(value, out var v))
{
throw new Exception($"单元薄 meta 定义 title_rows:{value} 属性值只能为整数[1,10]");
}
if (v < 1 || v > 10)
{
throw new Exception($"单元薄 title_rows 应该在 [1,10] 范围内,默认是3");
}
TitleRows = v;
break;
}
case "ignore":
{
if (!bool.TryParse(value, out var v))
{
throw new Exception($"单元薄 meta 定义 ignore:{value} 属性值只能为true或false");
}
if (v)
{
return false;
}
break;
}
default:
{
throw new Exception($"非法单元薄 meta 属性定义 {attr}");
}
}
}
return true;
}
private bool NotExport(List<Cell> row)
{
if (row.Count == 0)
{
return true;
}
if (row[0].Value == null)
{
return false;
}
string exportFlag = row[0].Value.ToString().Trim().ToLower();
switch (exportFlag)
{
case "false":
case "否": return true;
case "true":
case "是": return false;
case "test":
case "测试":
{
if (!ExportTestData)
{
s_logger.Debug("忽略测试数据. row:{row}", row);
}
return !ExportTestData;
}
default: throw new Exception($"不支持的excel 导出标记: {exportFlag}");
}
}
private void InitSubTitles(Title parentTitle, List<List<Cell>> rows, CellRange[] mergeCells, int maxDepth, int depth, int fromColumn, int toColumn)
{
List<Cell> row = rows[depth];
//if (row.Count > fromColumn)
//{
// row = row.GetRange(fromColumn, Math.Min(row.Count, toColumn + 1) - fromColumn);
//}
foreach (var mergeCell in mergeCells)
{
if (mergeCell.FromRow == depth + 1 && mergeCell.FromColumn >= fromColumn && mergeCell.ToColumn <= toColumn)
{
string subTitleName = row[mergeCell.FromColumn].Value?.ToString().Trim();
if (!string.IsNullOrWhiteSpace(subTitleName))
{
var newTitle = new Title() { Name = subTitleName, FromIndex = mergeCell.FromColumn, ToIndex = mergeCell.ToColumn };
if (depth + 1 < maxDepth)
{
InitSubTitles(newTitle, rows, mergeCells, maxDepth, depth + 1, mergeCell.FromColumn, mergeCell.ToColumn);
}
parentTitle.AddSubTitle(newTitle);
}
}
}
for (int i = fromColumn; i <= toColumn; i++)
{
if (i >= row.Count)
{
break;
}
var name = row[i].Value?.ToString()?.Trim();
if (string.IsNullOrWhiteSpace(name))
{
continue;
}
if (parentTitle.SubTitles.TryGetValue(name, out var oldTitle))
{
if (oldTitle.FromIndex != i)
{
throw new Exception($"sub title 列:{name} 重复");
}
else
{
continue;
}
}
var newTitle = new Title() { Name = name, FromIndex = i, ToIndex = i };
if (depth + 1 < maxDepth)
{
InitSubTitles(newTitle, rows, mergeCells, maxDepth, depth + 1, i, i);
}
parentTitle.AddSubTitle(newTitle);
}
}
private void LoadRemainRows(IExcelDataReader reader)
{
// TODO 优化性能
// 几个思路
// 1. 没有 title 的列不加载
// 2. 空行优先跳过
// 3. 跳过null或者empty的单元格
var rows = new List<List<Cell>>();
int rowIndex = 0;
while (reader.Read())
{
++rowIndex; // 第一行是 meta ,跳过
var row = new List<Cell>();
for (int i = 0, n = reader.FieldCount; i < n; i++)
{
row.Add(new Cell(rowIndex, i, reader.GetValue(i)));
}
rows.Add(row);
}
if (OrientRow)
{
this._rowColumns = rows;
}
else
{
// 转置这个行列
int maxColumn = rows.Select(r => r.Count).Max();
this._rowColumns = new List<List<Cell>>();
for (int i = 0; i < maxColumn; i++)
{
var row = new List<Cell>();
for (int j = 0; j < rows.Count; j++)
{
row.Add(i < rows[i].Count ? rows[j][i] : new Cell(j + 1, i, null));
}
this._rowColumns.Add(row);
}
}
if (this._rowColumns.Count < 1)
{
throw new Exception($"没有定义字段名行");
}
_rootTitle = new Title() { Name = "_root_", FromIndex = 1, ToIndex = rows.Select(r => r.Count).Max() - 1 };
int titleRowNum = 1;
if (reader.MergeCells != null)
{
if (OrientRow)
{
foreach (var mergeCell in reader.MergeCells)
{
if (mergeCell.FromRow == 1 && mergeCell.FromColumn == 0 && mergeCell.ToColumn == 0)
{
titleRowNum = mergeCell.ToRow - mergeCell.FromRow + 1;
}
}
}
foreach (var mergeCell in reader.MergeCells)
{
if (OrientRow)
{
//if (mergeCell.FromRow <= 1 && mergeCell.ToRow >= 1)
if (mergeCell.FromRow == 1)
{
// 标题 行
titleRowNum = Math.Max(titleRowNum, mergeCell.ToRow - mergeCell.FromRow + 1);
var titleName = _rowColumns[0][mergeCell.FromColumn].Value?.ToString()?.Trim();
if (string.IsNullOrWhiteSpace(titleName))
{
continue;
}
var newTitle = new Title() { Name = titleName, FromIndex = mergeCell.FromColumn, ToIndex = mergeCell.ToColumn };
if (titleRowNum > 1)
{
InitSubTitles(newTitle, rows, reader.MergeCells, titleRowNum, 1, mergeCell.FromColumn, mergeCell.ToColumn);
}
_rootTitle.AddSubTitle(newTitle);
//s_logger.Info("=== sheet:{sheet} title:{title}", Name, newTitle);
}
}
else
{
if (mergeCell.FromColumn <= 0 && mergeCell.ToColumn >= 0)
{
// 标题 行
var titleName = _rowColumns[0][mergeCell.FromRow - 1].Value?.ToString()?.Trim();
if (string.IsNullOrWhiteSpace(titleName))
{
continue;
}
_rootTitle.AddSubTitle(new Title() { Name = titleName, FromIndex = mergeCell.FromRow - 1, ToIndex = mergeCell.ToRow - 1 });
}
}
}
}
//TODO 其实有bug. 未处理只占一列的 多级标题头
// 有一些列不是MergeCell,所以还需要额外处理
var titleRow = _rowColumns[0];
for (int i = 0; i < titleRow.Count; i++)
{
var name = titleRow[i].Value?.ToString()?.Trim();
if (string.IsNullOrWhiteSpace(name))
{
continue;
}
if (_rootTitle.SubTitles.TryGetValue(name, out var oldTitle))
{
if (oldTitle.FromIndex != i)
{
throw new Exception($"列:{name} 重复");
}
else
{
continue;
}
}
_rootTitle.AddSubTitle(new Title() { Name = name, FromIndex = i, ToIndex = i });
}
if (_rootTitle.SubTitleList.Count == 0)
{
throw new Exception($"没有定义任何有效 列");
}
_rootTitle.SortSubTitles();
foreach (var title in _rootTitle.SubTitleList)
{
// s_logger.Info("============ sheet:{sheet} title:{title}", Name, title);
}
// 删除标题行
this._rowColumns.RemoveRange(0, Math.Min(TitleRows + titleRowNum - 1, this._rowColumns.Count));
this._rowColumns.RemoveAll(row => NotExport(row));
}
public static bool IsBlankRow(List<Cell> row)
{
// 第一列被策划用于表示是否注释掉此行
// 忽略此列是否空白
return row.GetRange(1, row.Count - 1).All(c => c.Value == null || (c.Value is string s && string.IsNullOrWhiteSpace(s)));
}
public static bool IsBlankRow(List<Cell> row, int fromIndex, int toIndex)
{
for (int i = Math.Max(1, fromIndex), n = Math.Min(toIndex, row.Count - 1); i <= n; i++)
{
var v = row[i].Value;
if (v != null && !(v is string s && string.IsNullOrEmpty(s)))
{
return false;
}
}
return true;
}
private List<Cell> GetNextRecordRow()
{
while (curReadIndex < _rowColumns.Count)
{
var row = _rowColumns[curReadIndex++];
if (IsBlankRow(row))
{
continue;
}
return row;
}
return null;
}
private bool HasNotMainKey(List<Cell> row)
{
return string.IsNullOrWhiteSpace(row[1].Value?.ToString());
}
private List<List<Cell>> GetNextRecordRows()
{
List<List<Cell>> rows = null;
while (curReadIndex < _rowColumns.Count)
{
var row = _rowColumns[curReadIndex++];
if (IsBlankRow(row))
{
continue;
}
if (rows == null)
{
rows = new List<List<Cell>>() { row };
}
else
{
if (HasNotMainKey(row))
{
rows.Add(row);
}
else
{
--curReadIndex;
return rows;
}
}
}
return rows;
}
public List<DType> ReadMulti(TBean type)
{
var datas = new List<DType>();
for (DType data; (data = ReadOne(type)) != null;)
{
datas.Add(data);
}
return datas;
}
private int curReadIndex = 0;
public DType ReadOne(TBean type)
{
if (!IsMultiRow)
{
List<Cell> row = GetNextRecordRow();
if (row == null)
{
return null;
}
return ExcelNamedRowDataCreator.Ins.ReadExcel(new NamedRow(_rootTitle, row), type);
}
else
{
List<List<Cell>> rows = GetNextRecordRows();
if (rows == null)
{
return null;
}
return ExcelNamedRowDataCreator.Ins.ReadExcel(new NamedRow(_rootTitle, rows), type);
}
}
}
}