python学習 - hajitaniの日記


import lxml.etree as etree
# 追加・更新関数

class PathManipulator:
    @staticmethod
    def custom_split(xpath, sep='/'):
        result = []
        in_quotes = False
        start = 0

        for i, char in enumerate(xpath):
            if char in ('"', "'"):
                in_quotes = not in_quotes
            elif char in sep and not in_quotes:
                result.append(xpath[start:i])
                start = i + 1

        result.append(xpath[start:])
        return [x for x in result if x]  # 空文字列を除外する

    @staticmethod
    def strncmp(s1, s2, n):
        len1 = len(s1)
        len2 = len(s2)
        min_len = min(len1, len2, n)

        for i in range(min_len):
            if s1[i] != s2[i]:
                return ord(s1[i]) - ord(s2[i])

        if min_len < n:
            return len1 - len2
        else:
            return 0

    @staticmethod
    def split_except_literal(input_string, separator):
        result = []
        temp = ""
        skip_count = 0
        separetor_len = len(separator)
        literal_started = False
        for i, char in enumerate(input_string):
            separator_candidate = PathManipulator.extract_string(
                input_string, i, separetor_len)
            if skip_count > 0:
                skip_count -= 1
                continue
            if char == "'":
                literal_started = not literal_started
                temp += char
            elif PathManipulator.strncmp(separator_candidate, separator, separetor_len) == 0 and not literal_started:
                skip_count = separetor_len - 1
                result.append(temp)
                temp = ""
            else:
                temp += char
        if temp:
            result.append(temp)
        return result

    @staticmethod
    def extract_string(string, start_index, length):
        return string[start_index:start_index + length]


# 追加・更新関数

class XmlManipulator:
    def __init__(self, filepath):
        self.filepath = filepath
        self. parser = etree.XMLParser(remove_blank_text=True)
        self.tree = etree.parse(self.filepath, self.parser)
        self.root = self.tree.getroot()

    def upsert(self, xpath):
        parts = PathManipulator.custom_split(xpath)
        current_element = self.root
        query = "."

        for i, part in enumerate(parts):
            if (part == "."):
                continue

            # タグ名と属性([]をひとかたまりの文字列として扱う)取得する
            tag_name, *attributes_string = PathManipulator.custom_split(part, '[]')
            print(attributes_string)
            print(part + " -> " + tag_name + " -> " + str(attributes_string))
            tag_name = tag_name.strip()
            attributes_string_internal = " ".join(attributes_string).strip()
            attributes_string = ""
            if len(attributes_string_internal) is not 0:
                attributes_string = '[' + attributes_string_internal + ']'

            query = f"./{tag_name}{attributes_string}"
            elements = current_element.xpath(query)

            if not elements:
                # 要素が存在しない場合は新規作成する
                new_element = etree.SubElement(current_element, tag_name)

                # 属性を設定する
                for attribute in PathManipulator.split_except_literal(attributes_string_internal, 'and'):
                    attribute = attribute.strip()
                    key, value = attribute.split('=')
                    key = key.strip("@")
                    new_element.set(key.strip(), value.strip("]['\""))

                current_element = new_element
            else:
                # 要素が存在する場合はその要素を選択する
                print("Found: " + str(len(elements)) +
                      " elements for the given XPath = " + query)
                current_element = elements[0]

        return current_element

    # 削除関数

    def delete(self, xpath):
        elements = self.root.xpath(xpath)
        if len(elements) > 1:
            print("Warning: Multiple elements found for the given XPath = " + xpath)
        elif len(elements) == 1:
            parent = elements[0].getparent()
            parent.remove(elements[0])
        else:
            print("Warning: No elements found for the given XPath = " + xpath)

    def save(self,  filepath):
        # 名前空間を削除して保存する
        for elem in self.root.iter():
            elem.tag = etree.QName(elem).localname
        # インデントを行う
        tree = self.root.getroottree()
        tree.write(filepath, pretty_print=True,
                   encoding='utf-8', xml_declaration=True)


# テスト用のコード
if __name__ == "__main__":
    xml_mod =XmlManipulator('input.xml')
    # 追加・更新のテスト
    # ない場合は新規作成
    xml_mod.upsert(
        "./base[@class='hogeC' and @distName='/aaa/bbb-99/fff-2/ddd-1/']")
    # ない場合は新規作成
    xml_mod.upsert(
        "./base[@class='hogeC' and @distName='/aaa/bbb-99/fff-2/ddd-4/']")
    # ない場合は新規作成
    xml_mod.upsert(
        "./base[@class='hogeC' and @distName='/aaa/eee-99/fff-2/ddd-4/']")
    element = xml_mod.upsert(
        "./base[@class='hogeA' and @distName='/aaa/bbb-99/ccc-5/ddd-1/']/list[@name='hogeList']/p")
    element.text = "hoge1"
    element = xml_mod.upsert(
        "./base[@class='hogeA' and @distName='/aaa/bbb-99/ccc-5/ddd-1/']/list[@name='hogeList']/p")
    element.text = "hoge2"
    element = xml_mod.upsert(
        "./base[@class='hogeA' and @distName='/aaa/bbb-99/ccc-5/ddd-1/']/list[@name='hogeList']/p")
    element.text = "hoge3"
    element = xml_mod.upsert(
        "./base[@class='hogeA' and @distName='/aaa/bbb-99/ccc-5/ddd-1/']/list[@name='hogeList']/p")
    element.text = "hoge4"
    element = xml_mod.upsert(
        "./base[@class='hogeA' and @distName='/aaa/bbb-99/ccc-5/ddd-1/']/list[@name='hogeList']/p")
    element.text = "hoge5"
    xml_mod.upsert("./base[@class='hogeA']")  # ある場合は取得
    # 削除のテスト
    xml_mod.delete("./base[@class='hogeC']")  # 削除する場合
    xml_mod.delete("./base[@class='hogeD']")  # 存在しない場合

    # 保存のテスト
    xml_mod.save( "output.xml")

import csv
import os


class CSVFileManager:
    def __init__(self, file_path, title_row_index=0, primary_keys=None):
        self.file_path = file_path
        self.title_row_index = title_row_index
        self.primary_keys = primary_keys or []
        self.data = self._read_csv()

    def _read_csv(self):
        with open(self.file_path, 'r', newline='') as file:
            reader = csv.reader(file)
            data = list(reader)
        return data

    def save(self, file_path=None):
        file_path = file_path or self.file_path
        with open(file_path, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerows(self.data)

    def upsert(self, new_row):
        title_row = self.data[self.title_row_index]
        
        # Check if primary keys are provided
        if not self.primary_keys:
            raise ValueError("Primary keys not specified.")
        
        # Find index of primary key columns
        key_indices = [title_row.index(key) for key in self.primary_keys]
        
        # Check for duplicate primary keys
        for row in self.data[self.title_row_index + 1:]:
            if all(row[idx] == new_row[title_row.index(title)] for idx, title in zip(key_indices, self.primary_keys)):
                raise ValueError("Duplicate primary keys found.")
        
        # Upsert or append the new row
        for idx, row in enumerate(self.data[self.title_row_index + 1:], start=self.title_row_index + 1):
            if all(row[idx] == new_row[title_row.index(title)] for idx, title in zip(key_indices, self.primary_keys)):
                self.data[idx] = new_row
                break
        else:
            self.data.append(new_row)

    def delete(self, key_values):
        title_row = self.data[self.title_row_index]
        
        # Check if primary keys are provided
        if not self.primary_keys:
            raise ValueError("Primary keys not specified.")
        
        # Find index of primary key columns
        key_indices = [title_row.index(key) for key in self.primary_keys]
        
        # Find rows with matching primary keys
        matching_rows = []
        for idx, row in enumerate(self.data[self.title_row_index + 1:], start=self.title_row_index + 1):
            if all(row[idx] == value for idx, value in zip(key_indices, key_values)):
                matching_rows.append(idx)
        
        # Error if no matching rows found
        if not matching_rows:
            print("Warning: No rows found with specified primary keys.")
            return
        
        # Error if multiple matching rows found
        if len(matching_rows) > 1:
            raise ValueError("Multiple rows found with specified primary keys.")
        
        # Delete matching row
        del self.data[matching_rows[0]]


# Example usage:
file_path = "example.csv"
primary_keys = ["ID"]

# Initialize CSVFileManager instance
manager = CSVFileManager(file_path, title_row_index=3, primary_keys=primary_keys)

# Upsert a new row
new_row = ["123", "John Doe", "30"]
manager.upsert(new_row)

# Delete a row
manager.delete(["123"])

# Save changes to file
manager.save()

import pandas as pd
import csv


def custom_split(csv_line, sep=',', strip=True):
    result = []
    in_quotes = False
    start = 0

    for i, char in enumerate(csv_line):
        if char in ('"', "'"):
            in_quotes = not in_quotes
        elif char in sep and not in_quotes:
            csv_value = csv_line[start:i]
            if strip:
                csv_value = csv_value.strip()
            csv_value = remove_outer_quotes(csv_value)
            result.append(csv_value)
            start = i + 1

    csv_value = csv_line[start:]
    if strip:
        csv_value = csv_value.strip()
    result.append(csv_value)
    return [x for x in result if x]  # 空文字列を除外する


def remove_outer_quotes(s):
    if len(s) < 2 or (s[0] != s[-1]) or (s[0] not in ['"', "'"]):
        return s
    elif s.count(s[0]) >= 3:
        return s
    else:
        return s[1:-1]


class CustomCSVParser:
    def __init__(self, file_path, skiprows=3):
        self.data = []
        with open(file_path, 'r') as f:
            lines = f.readlines()
            self.titles = custom_split(lines[skiprows].strip())
            data_lines = lines[skiprows + 1:]
            for line in data_lines:
                line = line.strip()
                if line:
                    row = custom_split(line)
                    self.data.append(row)

    def convertToCsv(self, dn, list_val, param_name, value):
        try:
            dn_index = self.titles.index('DN')
            list_index = self.titles.index('list')
            param_index = self.titles.index('param abbreviated name')
            value_index = self.titles.index('value')

            for row in self.data:
                row_list = row[list_index]
                print(len(row_list), len(list_val), len("''"))
                if row_list == list_val:
                    print("same!")
                if row[dn_index] == dn and row_list == list_val and row[param_index] == param_name:
                    values = row[value_index].split(',')
                    for val_pair in values:
                        key, val = val_pair.split(':')
                        if key == value:
                            return val
                    raise ValueError("Specified value not found in CSV.")
            raise ValueError("Specified combination not found in CSV.")
        except Exception as e:
            return str(e)

    def convertToCom(self, dn, list_val, param_name, value):
        try:
            row = self.data[(self.data['DN'] == dn) & (self.data['list'] == list_val) & (
                self.data['param abbreviated name'] == param_name)]
            if not row.empty:
                if value in row.values[0][4:]:
                    idx = row.values[0][4:].tolist().index(value) + 1
                    return row.columns[idx]
                else:
                    raise ValueError("Specified value not found in CSV.")
            else:
                raise ValueError("Specified combination not found in CSV.")
        except Exception as e:
            return str(e)


# Example usage:
converter = CustomCSVParser("stepdata.csv")
print(converter.convertToCsv("/aaa/bbb-/ccc-/ddd-",
      '', "data_name_1", "111"))  # Output: h2
print(converter.convertToCom("/aaa/bbb-/ccc-/ddd-",
      '', "data_name_1", "h2"))  # Output: 112