Coverage for class_generator/core/schema.py: 18%

143 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2025-07-29 12:31 +0300

1"""Schema management functions for resource definitions.""" 

2 

3import json 

4import shlex 

5import sys 

6from concurrent.futures import ThreadPoolExecutor, as_completed 

7from pathlib import Path 

8from typing import Any 

9 

10from packaging.version import Version 

11from pyhelper_utils.shell import run_command 

12from simple_logger.logger import get_logger 

13 

14from class_generator.constants import RESOURCES_MAPPING_FILE, SCHEMA_DIR 

15from ocp_resources.utils.schema_validator import SchemaValidator 

16 

17LOGGER = get_logger(name=__name__) 

18 

19 

20def get_client_binary() -> str: 

21 """Determine whether to use 'oc' or 'kubectl' binary.""" 

22 # Check if 'oc' binary exists 

23 rc, _, _ = run_command(command=shlex.split("which oc"), check=False) 

24 if rc == 0: 

25 return "oc" 

26 

27 # Fall back to kubectl 

28 rc, _, _ = run_command(command=shlex.split("which kubectl"), check=False) 

29 if rc == 0: 

30 return "kubectl" 

31 

32 raise RuntimeError("Neither 'oc' nor 'kubectl' binary found in PATH") 

33 

34 

35def read_resources_mapping_file() -> dict[Any, Any]: 

36 """Read resources mapping using SchemaValidator for consistency""" 

37 # Try to use SchemaValidator first 

38 if SchemaValidator.load_mappings_data(): 

39 return SchemaValidator._mappings_data or {} 

40 

41 # Fallback for cases where schema files don't exist yet (e.g., initial generation) 

42 try: 

43 with open(RESOURCES_MAPPING_FILE) as fd: 

44 return json.load(fd) 

45 except (FileNotFoundError, json.JSONDecodeError): 

46 return {} 

47 

48 

49def extract_group_kind_version(_kind_schema: dict[str, Any]) -> dict[str, str]: 

50 """Extract group, kind, and version from schema.""" 

51 group_kind_versions: list[dict[str, str]] = _kind_schema["x-kubernetes-group-version-kind"] 

52 group_kind_version = group_kind_versions[0] 

53 

54 for group_kind_version in group_kind_versions: 

55 if group_kind_version.get("group"): 

56 break 

57 

58 return group_kind_version 

59 

60 

61def get_server_version(client: str) -> str: 

62 """Get the server version from the cluster.""" 

63 rc, out, _ = run_command(command=shlex.split(f"{client} version -o json"), check=False) 

64 if not rc: 

65 LOGGER.error("Failed to get server version") 

66 sys.exit(1) 

67 

68 jout = json.loads(out) 

69 server_version = jout["serverVersion"]["gitVersion"] 

70 LOGGER.info(f"Server version: {server_version}") 

71 return server_version 

72 

73 

74def build_namespacing_dict(client: str) -> dict[str, bool]: 

75 """Build a dictionary of resource kinds and their namespaced status""" 

76 namespacing_dict = {} 

77 

78 # Process both namespaced and cluster-scoped resources 

79 for namespaced in [True, False]: 

80 cmd = f"{client} api-resources --namespaced={str(namespaced).lower()} --no-headers" 

81 success, output, _ = run_command(command=shlex.split(cmd), check=False, log_errors=False) 

82 if success and output: 

83 for line in output.strip().split("\n"): 

84 # Split and filter out empty strings 

85 parts = [p for p in line.split() if p] 

86 if parts: 

87 kind = parts[-1] # KIND is the last column 

88 namespacing_dict[kind] = namespaced 

89 

90 LOGGER.info(f"Built namespacing dictionary with {len(namespacing_dict)} resources") 

91 return namespacing_dict 

92 

93 

94def update_kind_schema() -> None: 

95 """Update schema files using OpenAPI v3 endpoints""" 

96 client = get_client_binary() 

97 

98 # Build namespacing dictionary once 

99 namespacing_dict = build_namespacing_dict(client=client) 

100 

101 # Get v3 API index 

102 LOGGER.info("Fetching OpenAPI v3 index...") 

103 success, v3_data, _ = run_command(command=shlex.split(f"{client} get --raw /openapi/v3"), check=False) 

104 if not success: 

105 LOGGER.error("Failed to fetch OpenAPI v3 index") 

106 sys.exit(1) 

107 

108 v3_index = json.loads(v3_data) 

109 paths = v3_index.get("paths", {}) 

110 LOGGER.info(f"Found {len(paths)} API groups to process") 

111 

112 # Check and update cluster version 

113 cluster_version_file = Path("class_generator/schema/__cluster_version__.txt") 

114 last_cluster_version_generated: str = "" 

115 try: 

116 with open(cluster_version_file, "r") as fd: 

117 last_cluster_version_generated = fd.read().strip() 

118 except (FileNotFoundError, IOError) as exp: 

119 LOGGER.error(f"Failed to read cluster version file: {exp}") 

120 sys.exit(1) 

121 

122 cluster_version = get_server_version(client=client) 

123 cluster_version = cluster_version.split("+")[0] 

124 

125 same_or_newer_version: bool = Version(cluster_version) >= Version(last_cluster_version_generated) 

126 

127 if same_or_newer_version: 

128 with open(cluster_version_file, "w") as fd: 

129 fd.write(cluster_version) 

130 

131 # Ensure schema directory exists 

132 Path(SCHEMA_DIR).mkdir(parents=True, exist_ok=True) 

133 

134 # Load existing resources mapping 

135 resources_mapping = read_resources_mapping_file() 

136 definitions = {} 

137 

138 # Track processed schemas to avoid duplicates 

139 processed_schemas = set() 

140 total_schemas = 0 

141 

142 # Function to fetch and process a single API group 

143 def fetch_api_group(api_path: str, api_info: dict[str, Any]) -> tuple[str, dict[str, Any] | None]: 

144 api_url = api_info.get("serverRelativeURL", "") 

145 if not api_url: 

146 return api_path, None 

147 

148 LOGGER.info(f"Processing {api_path}...") 

149 success, schema_data, _ = run_command(command=shlex.split(f"{client} get --raw {api_url}"), check=False) 

150 

151 if not success: 

152 LOGGER.warning(f"Failed to fetch schema for {api_path}") 

153 return api_path, None 

154 

155 try: 

156 schema = json.loads(schema_data) 

157 return api_path, schema 

158 except json.JSONDecodeError as e: 

159 LOGGER.warning(f"Failed to parse schema for {api_path}: {e}") 

160 return api_path, None 

161 

162 # Use ThreadPoolExecutor to parallelize API fetching 

163 with ThreadPoolExecutor(max_workers=10) as executor: 

164 # Submit all fetch tasks 

165 future_to_path = { 

166 executor.submit(fetch_api_group, api_path, api_info): api_path 

167 for api_path, api_info in paths.items() 

168 } 

169 

170 # Process results as they complete 

171 for future in as_completed(future_to_path): 

172 api_path, schema = future.result() 

173 

174 if not schema: 

175 continue 

176 

177 # Process schema definitions 

178 for def_name, def_data in schema.get("definitions", {}).items(): 

179 if def_name in processed_schemas: 

180 continue 

181 

182 processed_schemas.add(def_name) 

183 

184 # Extract schema info 

185 gvk_list = def_data.get("x-kubernetes-group-version-kind", []) 

186 if not gvk_list: 

187 continue 

188 

189 # Get the proper GVK 

190 group_kind_version = gvk_list[0] 

191 for gvk in gvk_list: 

192 if gvk.get("group"): 

193 group_kind_version = gvk 

194 break 

195 

196 kind = group_kind_version.get("kind", "") 

197 group = group_kind_version.get("group", "") 

198 version = group_kind_version.get("version", "") 

199 

200 if not kind: 

201 continue 

202 

203 # Determine if resource is namespaced 

204 is_namespaced = namespacing_dict.get(kind, True) 

205 

206 # Build schema name 

207 if group: 

208 schema_name = f"{group}/{version}/{kind}" 

209 else: 

210 schema_name = f"{version}/{kind}" 

211 

212 # Update resources mapping 

213 resources_mapping[kind] = { 

214 "api_version": f"{group}/{version}" if group else version, 

215 "api_group": group, 

216 "namespaced": is_namespaced, 

217 } 

218 

219 # Store schema data 

220 schema_data = { 

221 "description": def_data.get("description", ""), 

222 "properties": def_data.get("properties", {}), 

223 "required": def_data.get("required", []), 

224 "type": def_data.get("type", "object"), 

225 "x-kubernetes-group-version-kind": [group_kind_version], 

226 } 

227 

228 definitions[schema_name] = schema_data 

229 total_schemas += 1 

230 

231 LOGGER.info(f"Processed {total_schemas} unique schemas") 

232 

233 # Write updated definitions 

234 definitions_file = Path(SCHEMA_DIR) / "_definitions.json" 

235 with open(definitions_file, "w") as fd: 

236 json.dump(definitions, fd, indent=2, sort_keys=True) 

237 LOGGER.info(f"Written definitions to {definitions_file}") 

238 

239 # Write updated resources mapping 

240 with open(RESOURCES_MAPPING_FILE, "w") as fd: 

241 json.dump(resources_mapping, fd, indent=2, sort_keys=True) 

242 LOGGER.info(f"Written resources mapping to {RESOURCES_MAPPING_FILE}") 

243 

244 # Clear cached mapping data in SchemaValidator to force reload 

245 SchemaValidator._mappings_data = None 

246 SchemaValidator.load_mappings_data()