【完善】完善TitleRow解析

【完善】添加RawSheetTableDefInfo 解析
main
walon 2021-10-15 11:39:13 +08:00
parent 4613169811
commit 1c22887e69
6 changed files with 282 additions and 164 deletions

View File

@ -2,6 +2,7 @@
using Luban.Job.Cfg.Datas;
using Luban.Job.Cfg.DataSources.Excel;
using Luban.Job.Cfg.Defs;
using Luban.Job.Cfg.RawDefs;
using Luban.Job.Cfg.TypeVisitors;
using Luban.Job.Cfg.Utils;
using Luban.Job.Common.Types;
@ -222,29 +223,13 @@ namespace Luban.Job.Cfg.DataCreators
}
public DType Accept(TText type, Sheet sheet, TitleRow row)
{
string key;
string text;
var sep = GetSep(type);
if (!string.IsNullOrWhiteSpace(sep))
{
var keyText = row.Current.ToString().Split(sep);
if (keyText.Length != 2)
{
throw new Exception($"'{row.Current}' 不是合法text值");
}
key = ParseString(keyText[0]);
text = ParseString(keyText[1]);
}
else
{
if (row.Row.Count != 2)
{
throw new Exception($"text 要求两个字段");
}
key = ParseString(row.Row[0].Value);
text = ParseString(row.Row[1].Value);
}
var key = ParseString(row.Row[0].Value);
var text = ParseString(row.Row[1].Value);
DataUtil.ValidateText(key, text);
return new DText(key, text);
}
@ -287,8 +272,6 @@ namespace Luban.Job.Cfg.DataCreators
{
var s = row.AsStream(sep);
return type.Apply(ExcelStreamDataCreator.Ins, s);
}
else if (row.Rows != null)
{
@ -348,6 +331,8 @@ namespace Luban.Job.Cfg.DataCreators
}
}
const string SimpleContainerSep = ",;";
public string GetSep(TType type)
{
if (type.Tags.TryGetValue("sep", out var s) && !string.IsNullOrWhiteSpace(s))
@ -356,9 +341,10 @@ namespace Luban.Job.Cfg.DataCreators
}
switch (type)
{
case TArray ta: return ta.ElementType.Apply(IsNotSepTypeVisitor.Ins) ? "," : "";
case TList ta: return ta.ElementType.Apply(IsNotSepTypeVisitor.Ins) ? "," : "";
case TSet ta: return ta.ElementType.Apply(IsNotSepTypeVisitor.Ins) ? "," : "";
case TBean tb: return (tb.Bean as DefBean).Sep;
case TArray ta: return ta.ElementType.Apply(IsNotSepTypeVisitor.Ins) ? SimpleContainerSep : "";
case TList ta: return ta.ElementType.Apply(IsNotSepTypeVisitor.Ins) ? SimpleContainerSep : "";
case TSet ta: return ta.ElementType.Apply(IsNotSepTypeVisitor.Ins) ? SimpleContainerSep : "";
default: return "";
}
}

View File

@ -8,6 +8,10 @@ namespace Luban.Job.Cfg.DataSources.Excel
{
class FieldInfo
{
public string Name { get; init; }
public Dictionary<string, string> Tags { get; init; }
public string Type { get; init; }
public string BriefDesc { get; init; }

View File

@ -20,6 +20,8 @@ namespace Luban.Job.Cfg.DataSources.Excel
public string RawUrl { get; }
public List<TitleRow> Rows { get; } = new();
public Sheet(string rawUrl, string name)
{
this.RawUrl = rawUrl;
@ -28,12 +30,185 @@ namespace Luban.Job.Cfg.DataSources.Excel
public void Load(RawSheet rawSheet)
{
bool anyMultiRows = rawSheet.Title.SubTitleList.Any(t => t.SelfMultiRows);
var cells = rawSheet.Cells;
Title title = rawSheet.Title;
if (!anyMultiRows)
{
foreach (var row in cells)
{
if (IsBlankRow(row, title.FromIndex, title.ToIndex))
{
continue;
}
Rows.Add(ParseOneLineTitleRow(title, row));
}
}
else
{
foreach (var oneRecordRows in SplitRows(title, cells))
{
Rows.Add(ParseMultiLineTitleRow(title, oneRecordRows));
}
}
}
private TitleRow ParseOneLineTitleRow(Title title, List<Cell> row)
{
if (title.SubTitleList.Count == 0)
{
return new TitleRow(title, row);
}
Dictionary<string, TitleRow> fields = new();
foreach (var subTitle in title.SubTitleList)
{
fields.Add(subTitle.Name, ParseOneLineTitleRow(subTitle, row));
}
return new TitleRow(title, fields);
}
private IEnumerable<List<List<Cell>>> SplitRows(Title title, List<List<Cell>> rows)
{
List<List<Cell>> oneRecordRows = null;
foreach (var row in rows)
{
if (IsBlankRow(row, title.FromIndex, title.ToIndex))
{
continue;
}
if (oneRecordRows == null)
{
oneRecordRows = new List<List<Cell>>() { row };
}
else
{
if (title.SubTitleList.All(t => !t.SelfMultiRows && IsBlankRow(row, t.FromIndex, t.ToIndex)))
{
oneRecordRows.Add(row);
}
else
{
yield return oneRecordRows;
oneRecordRows = null;
}
}
}
if (oneRecordRows != null)
{
yield return oneRecordRows;
}
}
private TitleRow ParseMultiLineTitleRow(Title title, List<List<Cell>> rows)
{
if (title.SubTitleList.Count == 0)
{
if (title.SelfMultiRows)
{
return new TitleRow(title, rows);
}
else
{
return new TitleRow(title, rows[0]);
}
}
else
{
if (title.SelfMultiRows)
{
var eles = new List<TitleRow>();
foreach (var eleRow in SplitRows(title, rows))
{
var fields = new Dictionary<string, TitleRow>();
foreach (var subTitle in title.SubTitleList)
{
if (subTitle.SelfMultiRows)
{
fields.Add(subTitle.Name, ParseMultiLineTitleRow(title, eleRow));
}
else
{
fields.Add(subTitle.Name, ParseOneLineTitleRow(title, eleRow[0]));
}
}
eles.Add(new TitleRow(title, fields));
}
return new TitleRow(title, eles);
}
else
{
var fields = new Dictionary<string, TitleRow>();
foreach (var subTitle in title.SubTitleList)
{
if (subTitle.SelfMultiRows)
{
fields.Add(subTitle.Name, ParseMultiLineTitleRow(title, rows));
}
else
{
fields.Add(subTitle.Name, ParseOneLineTitleRow(title, rows[0]));
}
}
return new TitleRow(title, fields);
}
}
}
public IEnumerable<TitleRow> GetRows()
{
yield return null;
return Rows;
}
private static bool IsBlankRow(List<Cell> row, int fromIndex, int toIndex)
{
for (int i = Math.Max(1, fromIndex), n = Math.Min(toIndex, row.Count - 1); i <= n; i++)
{
var v = row[i].Value;
if (v != null && !(v is string s && string.IsNullOrEmpty(s)))
{
return false;
}
}
return true;
}
private static bool IsSameRow(List<Cell> row1, List<Cell> row2, int fromIndex, int toIndex)
{
if (row2.Count < toIndex - 1)
{
return false;
}
for (int i = Math.Max(1, fromIndex), n = Math.Min(toIndex, row1.Count - 1); i <= n; i++)
{
var v1 = row1[i].Value;
var v2 = row2[i].Value;
if (v1 != v2)
{
if (v1 == null)
{
if (!(v2 is string s && string.IsNullOrWhiteSpace(s)))
{
return false;
}
}
else if (v2 == null)
{
if (!(v1 is string s && string.IsNullOrWhiteSpace(s)))
{
return false;
}
}
else
{
return v1.ToString() == v2.ToString();
}
}
}
return true;
}
}
}

View File

@ -17,24 +17,6 @@ namespace Luban.Job.Cfg.DataSources.Excel
private const int TITLE_MAX_ROW_NUM = 10;
private const int TITLE_DEFAULT_ROW_NUM = 3;
//private bool IsOrientRow { get; set; } = true; // 以行为数据读取方向
//public int HeaderRowCount { get; private set; } = TITLE_DEFAULT_ROW_NUM; // 默认有三行是标题行. 第一行是字段名,第二行是中文描述,第三行是注释
//public int AttrRowCount { get; private set; }
//public string RawUrl { get; }
//public string Name { get; }
//private List<List<Cell>> _rowColumns;
//private Title _rootTitle;
//public List<Title> RootFields => _rootTitle.SubTitleList;
//public List<List<Cell>> RowColumns => _rowColumns;
private static System.Text.Encoding DetectCsvEncoding(Stream fs)
{
Ude.CharsetDetector cdet = new Ude.CharsetDetector();
@ -90,7 +72,7 @@ namespace Luban.Job.Cfg.DataSources.Excel
return null;
}
var cells = ParseRawSheetContent(reader, orientRow);
var title = ParseTitle(cells, reader.MergeCells, orientRow);
var title = ParseTitle(cells, reader.MergeCells, orientRow, out _);
cells.RemoveRange(0, Math.Min(titleRowNum, cells.Count));
return new RawSheet() { Title = title, Cells = cells };
}
@ -124,15 +106,15 @@ namespace Luban.Job.Cfg.DataSources.Excel
return 1;
}
public static Title ParseTitle(List<List<Cell>> cells, CellRange[] mergeCells, bool orientRow)
public static Title ParseTitle(List<List<Cell>> cells, CellRange[] mergeCells, bool orientRow, out int titleRowNum)
{
var rootTitle = new Title() { Root = true, Name = "__root__", FromIndex = 0, ToIndex = cells.Select(r => r.Count).Max() - 1 };
int titleRowNum = GetTitleRowNum(mergeCells, orientRow);
titleRowNum = GetTitleRowNum(mergeCells, orientRow);
ParseSubTitles(rootTitle, cells, mergeCells, orientRow, 1, titleRowNum);
rootTitle.SortSubTitles();
rootTitle.Init();
if (rootTitle.SubTitleList.Count == 0)
{
@ -146,12 +128,12 @@ namespace Luban.Job.Cfg.DataSources.Excel
return string.IsNullOrEmpty(title) || title.StartsWith('#');
}
private static (string Name, string Sep) ParseNameAndMetaAttrs(string nameAndAttrs)
private static (string Name, Dictionary<string, string> Tags) ParseNameAndMetaAttrs(string nameAndAttrs)
{
var attrs = nameAndAttrs.Split('&');
string titleName = attrs[0];
string sep = "";
var tags = new Dictionary<string, string>();
foreach (var attrPair in attrs.Skip(1))
{
var pairs = attrPair.Split('=');
@ -159,20 +141,9 @@ namespace Luban.Job.Cfg.DataSources.Excel
{
throw new Exception($"invalid title: {nameAndAttrs}");
}
switch (pairs[0])
{
case "sep":
{
sep = pairs[1];
break;
tags.Add(pairs[0], pairs[1]);
}
default:
{
throw new Exception($"invalid title: {nameAndAttrs}");
}
}
}
return (titleName, sep);
return (titleName, tags);
}
private static void ParseSubTitles(Title title, List<List<Cell>> cells, CellRange[] mergeCells, bool orientRow, int curDepth, int maxDepth)
@ -192,8 +163,8 @@ namespace Luban.Job.Cfg.DataSources.Excel
{
continue;
}
var (titleName, sep) = ParseNameAndMetaAttrs(nameAndAttrs);
subTitle = new Title() { Name = titleName, Sep = sep, FromIndex = mergeCell.FromColumn, ToIndex = mergeCell.ToColumn };
var (titleName, tags) = ParseNameAndMetaAttrs(nameAndAttrs);
subTitle = new Title() { Name = titleName, Tags = tags, FromIndex = mergeCell.FromColumn, ToIndex = mergeCell.ToColumn };
//s_logger.Info("=== sheet:{sheet} title:{title}", Name, newTitle);
}
}
@ -207,8 +178,8 @@ namespace Luban.Job.Cfg.DataSources.Excel
{
continue;
}
var (titleName, sep) = ParseNameAndMetaAttrs(nameAndAttrs);
subTitle = new Title() { Name = titleName, Sep = sep, FromIndex = mergeCell.FromRow - 1, ToIndex = mergeCell.ToRow - 1 };
var (titleName, tags) = ParseNameAndMetaAttrs(nameAndAttrs);
subTitle = new Title() { Name = titleName, Tags = tags, FromIndex = mergeCell.FromRow - 1, ToIndex = mergeCell.ToRow - 1 };
}
}
if (subTitle == null)
@ -231,7 +202,7 @@ namespace Luban.Job.Cfg.DataSources.Excel
{
continue;
}
var (titleName, sep) = ParseNameAndMetaAttrs(nameAndAttrs);
var (titleName, tags) = ParseNameAndMetaAttrs(nameAndAttrs);
if (title.SubTitles.TryGetValue(titleName, out var oldTitle))
{
@ -244,40 +215,10 @@ namespace Luban.Job.Cfg.DataSources.Excel
continue;
}
}
title.AddSubTitle(new Title() { Name = titleName, Sep = sep, FromIndex = i, ToIndex = i });
title.AddSubTitle(new Title() { Name = titleName, Tags = tags, FromIndex = i, ToIndex = i });
}
}
public static RawSheetTableDefInfo LoadSheetTableDefInfo(string rawUrl, string sheetName, Stream stream)
{
s_logger.Trace("{filename} {sheet}", rawUrl, sheetName);
string ext = Path.GetExtension(rawUrl);
//using (var reader = ext != ".csv" ? ExcelReaderFactory.CreateReader(stream) : ExcelReaderFactory.CreateCsvReader(stream, new ExcelReaderConfiguration() { FallbackEncoding = DetectCsvEncoding(stream) }))
//{
// do
// {
// if (sheetName == null || reader.Name == sheetName)
// {
// try
// {
// var sheet = ReadSheet(rawUrl, reader);
// if (sheet != null)
// {
// _sheets.Add(sheet);
// }
// }
// catch (Exception e)
// {
// throw new Exception($"excel:{rawUrl} sheet:{reader.Name} 读取失败.", e);
// }
// }
// } while (reader.NextResult());
//}
return null;
}
public static bool TryParseMeta(IExcelDataReader reader, out bool orientRow, out int titleRows, out string tableName)
{
orientRow = true;
@ -342,7 +283,7 @@ namespace Luban.Job.Cfg.DataSources.Excel
return true;
}
private static List<List<Cell>> ParseRawSheetContent(IExcelDataReader reader, bool orientRow)
private static List<List<Cell>> ParseRawSheetContent(IExcelDataReader reader, bool orientRow, int? maxParseRow = null)
{
// TODO 优化性能
// 几个思路
@ -360,6 +301,10 @@ namespace Luban.Job.Cfg.DataSources.Excel
row.Add(new Cell(rowIndex, i, reader.GetValue(i)));
}
originRows.Add(row);
if (orientRow && maxParseRow != null && originRows.Count > maxParseRow)
{
break;
}
}
List<List<Cell>> finalRows;
@ -386,81 +331,70 @@ namespace Luban.Job.Cfg.DataSources.Excel
return finalRows;
}
private static bool IsBlankRow(List<Cell> row)
public static RawSheetTableDefInfo LoadSheetTableDefInfo(string rawUrl, string sheetName, Stream stream)
{
// 第一列被策划用于表示是否注释掉此行
// 忽略此列是否空白
return row.GetRange(1, row.Count - 1).All(c => c.Value == null || (c.Value is string s && string.IsNullOrWhiteSpace(s)));
s_logger.Trace("{filename} {sheet}", rawUrl, sheetName);
string ext = Path.GetExtension(rawUrl);
using (var reader = ext != ".csv" ? ExcelReaderFactory.CreateReader(stream) : ExcelReaderFactory.CreateCsvReader(stream, new ExcelReaderConfiguration() { FallbackEncoding = DetectCsvEncoding(stream) }))
{
do
{
if (sheetName == null || reader.Name == sheetName)
{
try
{
var tableDefInfo = ParseSheetTableDefInfo(rawUrl, reader);
if (tableDefInfo != null)
{
return tableDefInfo;
}
}
catch (Exception e)
{
throw new Exception($"excel:{rawUrl} sheet:{reader.Name} 读取失败.", e);
}
private static bool IsBlankRow(List<Cell> row, int fromIndex, int toIndex)
{
for (int i = Math.Max(1, fromIndex), n = Math.Min(toIndex, row.Count - 1); i <= n; i++)
{
var v = row[i].Value;
if (v != null && !(v is string s && string.IsNullOrEmpty(s)))
{
return false;
}
} while (reader.NextResult());
}
return true;
throw new Exception($"{rawUrl} 没有找到有效的表定义");
}
private static bool IsSameRow(List<Cell> row1, List<Cell> row2, int fromIndex, int toIndex)
private static RawSheetTableDefInfo ParseSheetTableDefInfo(string rawUrl, IExcelDataReader reader)
{
if (row2.Count < toIndex - 1)
bool orientRow;
int headerRowNum;
if (!TryParseMeta(reader, out orientRow, out headerRowNum, out var _))
{
return false;
return null;
}
for (int i = Math.Max(1, fromIndex), n = Math.Min(toIndex, row1.Count - 1); i <= n; i++)
var cells = ParseRawSheetContent(reader, orientRow, headerRowNum);
var title = ParseTitle(cells, reader.MergeCells, orientRow, out int titleRowNum);
if (cells.Count <= titleRowNum)
{
var v1 = row1[i].Value;
var v2 = row2[i].Value;
if (v1 != v2)
throw new Exception($"缺失type行");
}
List<Cell> typeRow = cells[titleRowNum];
List<Cell> briefDescRow = cells.Count > titleRowNum + 1 ? cells[titleRowNum + 1] : null;
List<Cell> destailDescRow = cells.Count > titleRowNum + 2 ? cells[titleRowNum + 2] : briefDescRow;
var fields = new Dictionary<string, FieldInfo>();
foreach (var subTitle in title.SubTitleList)
{
if (v1 == null)
fields.Add(subTitle.Name, new FieldInfo()
{
if (!(v2 is string s && string.IsNullOrWhiteSpace(s)))
{
return false;
}
}
else if (v2 == null)
{
if (!(v1 is string s && string.IsNullOrWhiteSpace(s)))
{
return false;
}
}
else
{
return v1.ToString() == v2.ToString();
}
}
}
return true;
Name = subTitle.Name,
Tags = title.Tags,
Type = typeRow != null ? typeRow[subTitle.FromIndex].Value?.ToString() : "",
BriefDesc = briefDescRow != null ? briefDescRow[subTitle.FromIndex].Value?.ToString() : "",
DetailDesc = destailDescRow != null ? destailDescRow[subTitle.FromIndex].Value?.ToString() : "",
});
}
private static bool IsBlankColumn(List<List<Cell>> rows, int column)
{
foreach (List<Cell> row in rows)
{
if (column >= row.Count)
{
continue;
return new RawSheetTableDefInfo() { FieldInfos = fields };
}
var v = row[column].Value;
if (v != null && !(v is string s && string.IsNullOrEmpty(s)))
{
return false;
}
}
return true;
}
}
}

View File

@ -1,6 +1,7 @@
using Bright.Collections;
using System;
using System.Collections.Generic;
using System.Linq;
namespace Luban.Job.Cfg.DataSources.Excel
{
@ -14,7 +15,7 @@ namespace Luban.Job.Cfg.DataSources.Excel
public string Name { get; set; }
public string Sep { get; set; }
public Dictionary<string, string> Tags { get; set; }
public Dictionary<string, Title> SubTitles { get; set; } = new Dictionary<string, Title>();
@ -22,6 +23,12 @@ namespace Luban.Job.Cfg.DataSources.Excel
public bool HasSubTitle => SubTitleList.Count > 0;
public string Sep { get; private set; }
public bool SelfMultiRows { get; private set; }
public bool HierarchyMultiRows { get; private set; }
public void AddSubTitle(Title title)
{
if (!SubTitles.TryAdd(title.Name, title))
@ -34,7 +41,7 @@ namespace Luban.Job.Cfg.DataSources.Excel
// 由于先处理merge再处理只占一列的标题头.
// sub titles 未必是有序的。对于大多数数据并无影响
// 但对于 list类型的多级标题头有可能导致element 数据次序乱了
public void SortSubTitles()
private void SortSubTitles()
{
SubTitleList.Sort((t1, t2) => t1.FromIndex - t2.FromIndex);
foreach (var t in SubTitleList)
@ -43,6 +50,21 @@ namespace Luban.Job.Cfg.DataSources.Excel
}
}
public void Init()
{
SortSubTitles();
Sep = Tags.TryGetValue("sep", out var v) && !string.IsNullOrWhiteSpace(v) ? v : null;
SelfMultiRows = Tags.TryGetValue("multi_rows", out var v2) && (v2 == "1" || v2 == "true");
if (SubTitleList.Count > 0)
{
foreach (var sub in SubTitleList)
{
sub.Init();
}
}
HierarchyMultiRows = SelfMultiRows || SubTitleList.Any(t => t.HierarchyMultiRows);
}
public override string ToString()
{
return $"name:{Name} [{FromIndex}, {ToIndex}] sub titles:[{string.Join(",\\n", SubTitleList)}]";

View File

@ -161,9 +161,6 @@ namespace Luban.Job.Cfg.Defs
_cfgServices.Add(new Service() { Name = name, Manager = manager, Groups = groups, Refs = refs });
}
private readonly Dictionary<string, Table> _name2CfgTable = new Dictionary<string, Table>();
private static List<string> CreateGroups(string s)
{
return s.Split(',', ';').Select(x => x.Trim()).Where(x => !string.IsNullOrWhiteSpace(x)).ToList();