luban/src/Luban.Job.Cfg/Source/DataSources/Excel/Sheet.cs

687 lines
25 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

using ExcelDataReader;
using Luban.Job.Cfg.DataCreators;
using Luban.Job.Cfg.Datas;
using Luban.Job.Cfg.Defs;
using Luban.Job.Cfg.Utils;
using Luban.Job.Common.Types;
using Luban.Job.Common.Utils;
using System;
using System.Collections.Generic;
using System.Linq;
namespace Luban.Job.Cfg.DataSources.Excel
{
class Sheet
{
private static readonly NLog.Logger s_logger = NLog.LogManager.GetCurrentClassLogger();
private const int TITLE_MIN_ROW_NUM = 2;
private const int TITLE_MAX_ROW_NUM = 10;
private const int TITLE_DEFAULT_ROW_NUM = 3;
private bool IsOrientRow { get; set; } = true; // 以行为数据读取方向
public int TitleRows { get; private set; } = TITLE_DEFAULT_ROW_NUM; // 默认有三行是标题行. 第一行是字段名,第二行是中文描述,第三行是注释
public string RawUrl { get; }
public string Name { get; }
private List<List<Cell>> _rowColumns;
private Title _rootTitle;
public List<Title> RootFields => _rootTitle.SubTitleList;
public List<List<Cell>> RowColumns => _rowColumns;
public class Title
{
public bool Root { get; set; }
public int FromIndex { get; set; }
public int ToIndex { get; set; }
public string Name { get; set; }
public Dictionary<string, Title> SubTitles { get; set; } = new Dictionary<string, Title>();
public List<Title> SubTitleList { get; set; } = new List<Title>();
public void AddSubTitle(Title title)
{
if (!SubTitles.TryAdd(title.Name, title))
{
throw new Exception($"标题:{title.Name} 重复");
}
SubTitleList.Add(title);
}
// 由于先处理merge再处理只占一列的标题头.
// sub titles 未必是有序的。对于大多数数据并无影响
// 但对于 list类型的多级标题头有可能导致element 数据次序乱了
public void SortSubTitles()
{
SubTitleList.Sort((t1, t2) => t1.FromIndex - t2.FromIndex);
foreach (var t in SubTitleList)
{
t.SortSubTitles();
}
}
public override string ToString()
{
return $"name:{Name} [{FromIndex}, {ToIndex}] sub titles:[{string.Join(",\\n", SubTitleList)}]";
}
}
public struct Cell
{
public Cell(int row, int column, object value)
{
this.Row = row;
this.Column = column;
this.Value = value;
}
public int Row { get; } // 从 1 开始
public int Column { get; } // 从 0 开始,考虑改了它?
public object Value { get; }
private static string ToAlphaString(int column)
{
int h = column / 26;
int n = column % 26;
return $"{(h > 0 ? ((char)('A' + h - 1)).ToString() : "")}{(char)('A' + n)}";
}
public override string ToString()
{
return $"[{ToAlphaString(Column)}:{Row + 1}] {Value}";
}
}
public class NamedRow
{
public static IEnumerable<NamedRow> CreateMultiRowNamedRow(List<List<Cell>> rows, Title title, TBean bean)
{
if (!((DefBean)bean.Bean).IsMultiRow)
{
foreach (var row in rows)
{
if (Sheet.IsBlankRow(row, title.FromIndex, title.ToIndex))
{
continue;
}
yield return new NamedRow(title, row);
}
}
else
{
List<DefField> notMultiRowFields = bean.Bean.HierarchyFields.Select(f => (DefField)f).Where(f => !f.IsMultiRow && f.IsRowOrient).ToList();
List<List<Cell>> recordRows = null;
foreach (var row in rows)
{
// 忽略全空的行
if (Sheet.IsBlankRow(row, title.FromIndex, title.ToIndex))
{
continue;
}
// 如果非多行数据全空,或者跟记录第一行完全相同说明该行属于多行数据
if (notMultiRowFields.All(f =>
{
var fieldTitle = title.SubTitles[f.Name];
return Sheet.IsBlankRow(row, fieldTitle.FromIndex, fieldTitle.ToIndex);
}) || (title.Root && recordRows != null && notMultiRowFields.All(f =>
{
var fieldTitle = title.SubTitles[f.Name];
return Sheet.IsSameRow(row, recordRows[0], fieldTitle.FromIndex, fieldTitle.ToIndex);
})))
{
if (recordRows == null)
{
recordRows = new List<List<Cell>>();
}
recordRows.Add(row);
}
else
{
if (recordRows != null)
{
yield return new NamedRow(title, recordRows);
}
recordRows = new List<List<Cell>>();
recordRows.Add(row);
}
}
if (recordRows != null)
{
yield return new NamedRow(title, recordRows);
}
}
}
public Title SelfTitle { get; }
public List<List<Cell>> Rows { get; }
public Dictionary<string, Title> Titles => SelfTitle.SubTitles;
public List<Title> TitleList => SelfTitle.SubTitleList;
public NamedRow(Title selfTitle, List<Cell> row)
{
SelfTitle = selfTitle;
Rows = new List<List<Cell>>() { row };
}
public NamedRow(Title selfTitle, List<List<Cell>> rows)
{
SelfTitle = selfTitle;
Rows = rows;
}
public int RowCount => Rows.Count;
private void CheckEmptySinceSecondRow(string name, int fromIndex, int toIndex)
{
for (int i = 1; i < Rows.Count; i++)
{
var row = Rows[i];
if (!IsBlankRow(row, fromIndex, toIndex))
{
throw new Exception($"字段:{name} 不是多行字段,只能第一行填值. {Bright.Common.StringUtil.CollectionToString(row)}");
}
}
}
public Title GetTitle(string name)
{
return Titles.TryGetValue(name, out var title) ? title : null;
}
public ExcelStream GetColumn(string name, string sep, bool namedMode)
{
if (Titles.TryGetValue(name, out var title))
{
// 只有顶级root支持才允许非multi_rows字段与第一行相同时判定为同个记录
if (!this.SelfTitle.Root)
{
CheckEmptySinceSecondRow(name, title.FromIndex, title.ToIndex);
}
var es = new ExcelStream(Rows[0], title.FromIndex, title.ToIndex, sep, namedMode);
return es;
}
else
{
throw new Exception($"单元薄 缺失 列:{name},请检查是否写错或者遗漏");
}
}
public NamedRow GetSubTitleNamedRow(string name)
{
Title title = Titles[name];
return new NamedRow(title, this.Rows);
}
public IEnumerable<NamedRow> GenerateSubNameRows(TBean bean)
{
foreach (var row in Rows)
{
if (SelfTitle != null ? IsBlankRow(row, SelfTitle.FromIndex, SelfTitle.ToIndex) : IsBlankRow(row))
{
continue;
}
yield return new NamedRow(SelfTitle, row);
}
}
public IEnumerable<ExcelStream> GetColumnOfMultiRows(string name, string sep, bool isRowOrient)
{
if (Titles.TryGetValue(name, out var title))
{
if (isRowOrient)
{
foreach (var row in Rows)
{
if (IsBlankRow(row, title.FromIndex, title.ToIndex))
{
continue;
}
yield return new ExcelStream(row, title.FromIndex, title.ToIndex, sep, false);
}
}
else
{
for (int i = title.FromIndex; i <= title.ToIndex; i++)
{
if (!IsBlankColumn(Rows, i))
{
var cells = Rows.Where(r => r.Count > i).Select(r => r[i]).Where(v => !(v.Value == null || (v.Value is string s && string.IsNullOrEmpty(s)))).ToList();
yield return new ExcelStream(cells, 0, cells.Count - 1, sep, false);
}
}
}
}
else
{
throw new Exception($"单元薄 缺失 列:{name},请检查是否写错或者遗漏");
}
}
public ExcelStream GetMultiRowStream(string name, string sep, bool isRowOrient)
{
if (Titles.TryGetValue(name, out var title))
{
if (isRowOrient)
{
var totalCells = Rows.SelectMany(r => r.GetRange(title.FromIndex, title.ToIndex - title.FromIndex + 1))
.Where(c => c.Value != null && !(c.Value is string s && string.IsNullOrWhiteSpace(s))).ToList();
return new ExcelStream(totalCells, 0, totalCells.Count - 1, sep, false);
}
else
{
throw new NotSupportedException($"bean类型多行数据不支持纵向填写");
}
}
else
{
throw new Exception($"单元薄 缺失 列:{name},请检查是否写错或者遗漏");
}
}
}
public Sheet(string rawUrl, string name)
{
this.RawUrl = rawUrl;
this.Name = name;
}
public bool Load(IExcelDataReader reader, bool headerOnly)
{
//s_logger.Info("read sheet:{sheet}", reader.Name);
if (!ParseMeta(reader))
{
return false;
}
LoadRemainRows(reader, headerOnly);
return true;
}
private bool ParseMeta(IExcelDataReader reader)
{
if (!reader.Read() || reader.FieldCount == 0)
{
return false;
}
// meta 行 必须以 ##为第一个单元格内容,紧接着 key:value 形式 表达meta属性
if (reader.GetString(0) != "##")
{
return false;
}
for (int i = 1, n = reader.FieldCount; i < n; i++)
{
var attr = reader.GetString(i);
if (string.IsNullOrWhiteSpace(attr))
{
continue;
}
var ss = attr.Split(':', '=');
if (ss.Length != 2)
{
throw new Exception($"单元薄 meta 定义出错. attribute:{attr}");
}
string key = ss[0].ToLower();
string value = ss[1].ToLower();
switch (key)
{
case "orientation":
{
IsOrientRow = DefUtil.ParseOrientation(value);
break;
}
case "title_rows":
{
if (!int.TryParse(value, out var v))
{
throw new Exception($"单元薄 meta 定义 title_rows:{value} 属性值只能为整数[{TITLE_MIN_ROW_NUM},{TITLE_MAX_ROW_NUM}]");
}
if (v < TITLE_MIN_ROW_NUM || v > TITLE_MAX_ROW_NUM)
{
throw new Exception($"单元薄 title_rows 应该在 [{TITLE_MIN_ROW_NUM},{TITLE_MAX_ROW_NUM}] 范围内,默认是{TITLE_DEFAULT_ROW_NUM}");
}
TitleRows = v;
break;
}
default:
{
throw new Exception($"非法单元薄 meta 属性定义 {attr}, 合法属性有: orientation=r|row|c|column,title_rows=<number>");
}
}
}
return true;
}
private static string GetRowTag(List<Cell> row)
{
if (row.Count == 0)
{
return null;
}
if (row[0].Value == null)
{
return null;
}
return row[0].Value.ToString().Trim();
}
private void InitSubTitles(Title parentTitle, List<List<Cell>> rows, CellRange[] mergeCells, int maxDepth, int depth, int fromColumn, int toColumn)
{
List<Cell> row = rows[depth];
//if (row.Count > fromColumn)
//{
// row = row.GetRange(fromColumn, Math.Min(row.Count, toColumn + 1) - fromColumn);
//}
foreach (var mergeCell in mergeCells)
{
if (mergeCell.FromRow == depth + 1 && mergeCell.FromColumn >= fromColumn && mergeCell.ToColumn <= toColumn)
{
string subTitleName = row[mergeCell.FromColumn].Value?.ToString().Trim();
if (!string.IsNullOrWhiteSpace(subTitleName))
{
var newTitle = new Title() { Name = subTitleName, FromIndex = mergeCell.FromColumn, ToIndex = mergeCell.ToColumn };
if (depth + 1 < maxDepth)
{
InitSubTitles(newTitle, rows, mergeCells, maxDepth, depth + 1, mergeCell.FromColumn, mergeCell.ToColumn);
}
parentTitle.AddSubTitle(newTitle);
}
}
}
for (int i = fromColumn; i <= toColumn; i++)
{
if (i >= row.Count)
{
break;
}
var name = row[i].Value?.ToString()?.Trim();
if (string.IsNullOrWhiteSpace(name))
{
continue;
}
if (parentTitle.SubTitles.TryGetValue(name, out var oldTitle))
{
if (oldTitle.FromIndex != i)
{
throw new Exception($"sub title 列:{name} 重复");
}
else
{
continue;
}
}
var newTitle = new Title() { Name = name, FromIndex = i, ToIndex = i };
if (depth + 1 < maxDepth)
{
InitSubTitles(newTitle, rows, mergeCells, maxDepth, depth + 1, i, i);
}
parentTitle.AddSubTitle(newTitle);
}
}
const string ROOT_TITLE_NAME = "__<root>__";
private void LoadRemainRows(IExcelDataReader reader, bool headerOnly)
{
// TODO 优化性能
// 几个思路
// 1. 没有 title 的列不加载
// 2. 空行优先跳过
// 3. 跳过null或者empty的单元格
var rows = new List<List<Cell>>();
int rowIndex = 0;
while (reader.Read())
{
++rowIndex; // 第1行是 meta 标题及数据行从第2行开始
// 重点优化横表的headerOnly模式 此模式下只读前几行标题行,不读数据行
if (headerOnly && this.IsOrientRow && rowIndex >= 10)
{
break;
}
var row = new List<Cell>();
for (int i = 0, n = reader.FieldCount; i < n; i++)
{
row.Add(new Cell(rowIndex, i, reader.GetValue(i)));
}
rows.Add(row);
}
if (IsOrientRow)
{
this._rowColumns = rows;
}
else
{
// 转置这个行列
int maxColumn = rows.Select(r => r.Count).Max();
this._rowColumns = new List<List<Cell>>();
for (int i = 0; i < maxColumn; i++)
{
var row = new List<Cell>();
for (int j = 0; j < rows.Count; j++)
{
row.Add(i < rows[j].Count ? rows[j][i] : new Cell(j + 1, i, null));
}
this._rowColumns.Add(row);
}
}
if (this._rowColumns.Count < 1)
{
throw new Exception($"没有定义字段名行");
}
_rootTitle = new Title() { Root = true, Name = ROOT_TITLE_NAME, FromIndex = 1, ToIndex = rows.Select(r => r.Count).Max() - 1 };
int titleRowNum = 1;
if (reader.MergeCells != null)
{
if (IsOrientRow)
{
foreach (var mergeCell in reader.MergeCells)
{
if (mergeCell.FromRow == 1 && mergeCell.FromColumn == 0 && mergeCell.ToColumn == 0)
{
titleRowNum = mergeCell.ToRow - mergeCell.FromRow + 1;
}
}
}
foreach (var mergeCell in reader.MergeCells)
{
if (IsOrientRow)
{
//if (mergeCell.FromRow <= 1 && mergeCell.ToRow >= 1)
if (mergeCell.FromRow == 1)
{
// 标题 行
titleRowNum = Math.Max(titleRowNum, mergeCell.ToRow - mergeCell.FromRow + 1);
var titleName = _rowColumns[0][mergeCell.FromColumn].Value?.ToString()?.Trim();
if (string.IsNullOrWhiteSpace(titleName))
{
continue;
}
var newTitle = new Title() { Name = titleName, FromIndex = mergeCell.FromColumn, ToIndex = mergeCell.ToColumn };
if (titleRowNum > 1)
{
InitSubTitles(newTitle, rows, reader.MergeCells, titleRowNum, 1, mergeCell.FromColumn, mergeCell.ToColumn);
}
_rootTitle.AddSubTitle(newTitle);
//s_logger.Info("=== sheet:{sheet} title:{title}", Name, newTitle);
}
}
else
{
if (mergeCell.FromColumn <= 0 && mergeCell.ToColumn >= 0)
{
// 标题 行
var titleName = _rowColumns[0][mergeCell.FromRow - 1].Value?.ToString()?.Trim();
if (string.IsNullOrWhiteSpace(titleName))
{
continue;
}
_rootTitle.AddSubTitle(new Title() { Name = titleName, FromIndex = mergeCell.FromRow - 1, ToIndex = mergeCell.ToRow - 1 });
}
}
}
}
//TODO 其实有bug. 未处理只占一列的 多级标题头
// 上面的代码处理完Merge列,接下来处理非Merge的列
var titleRow = _rowColumns[0];
for (int i = 0; i < titleRow.Count; i++)
{
var name = titleRow[i].Value?.ToString()?.Trim();
if (string.IsNullOrWhiteSpace(name))
{
continue;
}
if (_rootTitle.SubTitles.TryGetValue(name, out var oldTitle))
{
if (oldTitle.FromIndex != i)
{
throw new Exception($"列:{name} 重复");
}
else
{
continue;
}
}
_rootTitle.AddSubTitle(new Title() { Name = name, FromIndex = i, ToIndex = i });
}
if (_rootTitle.SubTitleList.Count == 0)
{
throw new Exception($"没有定义任何有效 列");
}
_rootTitle.SortSubTitles();
if (headerOnly)
{
// 删除字段名行,保留属性行开始的行
this._rowColumns.RemoveRange(0, Math.Min(titleRowNum, this._rowColumns.Count));
}
else
{
// 删除所有标题行,包含字段名行、属性行、标题、描述等等非有效数据行
this._rowColumns.RemoveRange(0, Math.Min(TitleRows, this._rowColumns.Count));
// 删除忽略的记录行
this._rowColumns.RemoveAll(row => DataUtil.IsIgnoreTag(GetRowTag(row)));
}
}
private static bool IsBlankRow(List<Cell> row)
{
// 第一列被策划用于表示是否注释掉此行
// 忽略此列是否空白
return row.GetRange(1, row.Count - 1).All(c => c.Value == null || (c.Value is string s && string.IsNullOrWhiteSpace(s)));
}
private static bool IsBlankRow(List<Cell> row, int fromIndex, int toIndex)
{
for (int i = Math.Max(1, fromIndex), n = Math.Min(toIndex, row.Count - 1); i <= n; i++)
{
var v = row[i].Value;
if (v != null && !(v is string s && string.IsNullOrEmpty(s)))
{
return false;
}
}
return true;
}
private static bool IsSameRow(List<Cell> row1, List<Cell> row2, int fromIndex, int toIndex)
{
if (row2.Count < toIndex - 1)
{
return false;
}
for (int i = Math.Max(1, fromIndex), n = Math.Min(toIndex, row1.Count - 1); i <= n; i++)
{
var v1 = row1[i].Value;
var v2 = row2[i].Value;
if (v1 != v2)
{
if (v1 == null)
{
if (!(v2 is string s && string.IsNullOrWhiteSpace(s)))
{
return false;
}
}
else if (v2 == null)
{
if (!(v1 is string s && string.IsNullOrWhiteSpace(s)))
{
return false;
}
}
else
{
return v1.ToString() == v2.ToString();
}
}
}
return true;
}
private static bool IsBlankColumn(List<List<Cell>> rows, int column)
{
foreach (List<Cell> row in rows)
{
if (column >= row.Count)
{
continue;
}
var v = row[column].Value;
if (v != null && !(v is string s && string.IsNullOrEmpty(s)))
{
return false;
}
}
return true;
}
public IEnumerable<Record> ReadMulti(TBean type)
{
foreach (var recordNamedRow in NamedRow.CreateMultiRowNamedRow(this._rowColumns, this._rootTitle, type))
{
bool isTest = DataUtil.IsTestTag(GetRowTag(recordNamedRow.Rows[0]));
var data = (DBean)ExcelNamedRowDataCreator.Ins.ReadExcel(recordNamedRow, type);
yield return new Record(data, RawUrl, isTest);
}
}
}
}